diff --git a/configs/config.default.yml b/configs/config.default.yml index 81bbab5f..b510ac8e 100644 --- a/configs/config.default.yml +++ b/configs/config.default.yml @@ -12,6 +12,7 @@ Watchman: IncludedLists: - "us_csl" - "us_ofac" + - "ru_fa" PostalPool: Enabled: false diff --git a/go.mod b/go.mod index eb4f2e87..2efdfe69 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,8 @@ module github.com/moov-io/watchman -go 1.22.9 +go 1.23 -toolchain go1.23.4 +toolchain go1.24.0 require ( fyne.io/fyne/v2 v2.5.3 @@ -25,13 +25,15 @@ require ( go.opentelemetry.io/otel v1.34.0 go.opentelemetry.io/otel/trace v1.34.0 go.uber.org/automaxprocs v1.6.0 - golang.org/x/sync v0.10.0 - golang.org/x/text v0.21.0 + golang.org/x/sync v0.11.0 + golang.org/x/text v0.22.0 ) require ( fyne.io/systray v1.11.0 // indirect github.com/BurntSushi/toml v1.4.0 // indirect + github.com/PuerkitoBio/goquery v1.10.2 // indirect + github.com/andybalholm/cascadia v1.3.3 // indirect github.com/antchfx/xpath v1.3.2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect @@ -65,6 +67,7 @@ require ( github.com/magiconair/properties v1.8.7 // indirect github.com/markbates/pkger v0.17.1 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/mozillazg/go-unidecode v0.2.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/nicksnyder/go-i18n/v2 v2.4.0 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect @@ -97,8 +100,8 @@ require ( golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/image v0.18.0 // indirect golang.org/x/mobile v0.0.0-20231127183840-76ac6878050a // indirect - golang.org/x/net v0.33.0 // indirect - golang.org/x/sys v0.28.0 // indirect + golang.org/x/net v0.35.0 // indirect + golang.org/x/sys v0.30.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240924160255-9d4c2d233b61 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240930140551-af27646dc61f // indirect google.golang.org/grpc v1.67.0 // indirect diff --git a/go.sum b/go.sum index 0e0e8de1..f8e99925 100644 --- a/go.sum +++ b/go.sum @@ -47,8 +47,12 @@ github.com/BurntSushi/toml v1.4.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/JaderDias/movingmedian v0.0.0-20220813210630-d8c6b6de8835 h1:mbxQnovjDz5SvlatpxkbiMvybHH1hsSEu6OhPDLlfU8= github.com/JaderDias/movingmedian v0.0.0-20220813210630-d8c6b6de8835/go.mod h1:zsfWLaDctbM7aV1TsQAwkVswuKQ0k7PK4rjC1VZqpbI= +github.com/PuerkitoBio/goquery v1.10.2 h1:7fh2BdHcG6VFZsK7toXBT/Bh1z5Wmy8Q9MV9HqT2AM8= +github.com/PuerkitoBio/goquery v1.10.2/go.mod h1:0guWGjcLu9AYC7C1GHnpysHy056u9aEkUHwhdnePMCU= github.com/abadojack/whatlanggo v1.0.1 h1:19N6YogDnf71CTHm3Mp2qhYfkRdyvbgwWdd2EPxJRG4= github.com/abadojack/whatlanggo v1.0.1/go.mod h1:66WiQbSbJBIlOZMsvbKe5m6pzQovxCH9B/K8tQB2uoc= +github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= +github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= github.com/antchfx/htmlquery v1.3.3 h1:x6tVzrRhVNfECDaVxnZi1mEGrQg3mjE/rxbH2Pe6dNE= github.com/antchfx/htmlquery v1.3.3/go.mod h1:WeU3N7/rL6mb6dCwtE30dURBnBieKDC/fR8t6X+cKjU= github.com/antchfx/xpath v1.3.2 h1:LNjzlsSjinu3bQpw9hWMY9ocB80oLOWuQqFvO6xt51U= @@ -306,6 +310,8 @@ github.com/moov-io/base v0.53.0 h1:rpPWEbd/NTWApLzFq2AYbCZUlIv99OtvQcan7yArJVE= github.com/moov-io/base v0.53.0/go.mod h1:F2cdACBgJHNemPrOxvc88ezIqFL6ymErB4hOuPR+axg= github.com/moov-io/iso3166 v0.2.1 h1:kfj6tj6pupSuRB/q8sOSnMtHaAMqZoTx6RRB1Nw/Uzs= github.com/moov-io/iso3166 v0.2.1/go.mod h1:2fyJk2mvovjij1ya1/9gSuB2li1ZIm9jB2O/YusZQHQ= +github.com/mozillazg/go-unidecode v0.2.0 h1:vFGEzAH9KSwyWmXCOblazEWDh7fOkpmy/Z4ArmamSUc= +github.com/mozillazg/go-unidecode v0.2.0/go.mod h1:zB48+/Z5toiRolOZy9ksLryJ976VIwmDmpQ2quyt1aA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo= @@ -465,6 +471,10 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -509,6 +519,10 @@ golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -547,9 +561,16 @@ golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLd golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= +golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -574,8 +595,14 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -624,11 +651,23 @@ golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -639,8 +678,14 @@ golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -698,6 +743,9 @@ golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.8-0.20211022200916-316ba0b74098/go.mod h1:LGqMHiF4EqQNHR1JncWGqT5BVaXmza+X+BDGol+dOxo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/download/download.go b/internal/download/download.go index 57c3a308..07d0036b 100644 --- a/internal/download/download.go +++ b/internal/download/download.go @@ -11,6 +11,7 @@ import ( "github.com/moov-io/base/telemetry" "github.com/moov-io/watchman/pkg/csl_us" "github.com/moov-io/watchman/pkg/ofac" + "github.com/moov-io/watchman/pkg/ru_fa" "github.com/moov-io/watchman/pkg/search" "github.com/moov-io/base/log" @@ -69,6 +70,19 @@ func (dl *downloader) RefreshAll(ctx context.Context) (Stats, error) { // Create a WaitGroup to track all producers var producerWg sync.WaitGroup + // ru_fa Records + if slices.Contains(dl.conf.IncludedLists, search.SourceRUFA) { + producerWg.Add(1) + g.Go(func() error { + defer producerWg.Done() + err := loadRUFARecords(ctx, logger, dl.conf, preparedLists) + if err != nil { + return fmt.Errorf("loading ru_fa records: %w", err) + } + return nil + }) + } + // OFAC Records if slices.Contains(dl.conf.IncludedLists, search.SourceUSOFAC) { producerWg.Add(1) @@ -124,6 +138,42 @@ type preparedList struct { Hash string } +func loadRUFARecords(ctx context.Context, logger log.Logger, conf Config, responseCh chan preparedList) error { + ctx, span := telemetry.StartSpan(ctx, "load-rufa-records") + defer span.End() + + start := time.Now() + files, err := ru_fa.Download(ctx, logger, conf.InitialDataDirectory) + if err != nil { + return fmt.Errorf("RUFA download: %v", err) + } + defer files.Close() + + span.AddEvent("finished downloading") + + if len(files) == 0 { + return fmt.Errorf("unexpected %d OFAC files found", len(files)) + } + + logger.Debug().Logf("finished RUFA download: %v", time.Since(start)) + start = time.Now() + + tbls, err := ru_fa.Read(files) + if err != nil { + return fmt.Errorf("parsing RUFA: %w", err) + } + span.AddEvent("finished parsing") + + var entities = ru_fa.WrapEntities(tbls) + + responseCh <- preparedList{ + ListName: search.SourceRUFA, + Entities: entities, + Hash: "xxx", + } + return nil +} + func loadOFACRecords(ctx context.Context, logger log.Logger, conf Config, responseCh chan preparedList) error { ctx, span := telemetry.StartSpan(ctx, "load-ofac-records") defer span.End() diff --git a/pkg/ru_fa/download.go b/pkg/ru_fa/download.go new file mode 100644 index 00000000..050e597e --- /dev/null +++ b/pkg/ru_fa/download.go @@ -0,0 +1,20 @@ +// Copyright 2020 The Moov Authors +// Use of this source code is governed by an Apache License +// license that can be found in the LICENSE file. + +package ru_fa + +import ( + "context" + "github.com/moov-io/base/log" + "github.com/moov-io/watchman/pkg/download" +) + +func Download(ctx context.Context, logger log.Logger, initialDir string) (download.Files, error) { + dl := download.New(logger, download.HTTPClient) + + addrs := make(map[string]string) + addrs[htmlFilename] = urlToDownload + + return dl.GetFiles(ctx, initialDir, addrs) +} diff --git a/pkg/ru_fa/mapper.go b/pkg/ru_fa/mapper.go new file mode 100644 index 00000000..ffc835e1 --- /dev/null +++ b/pkg/ru_fa/mapper.go @@ -0,0 +1,72 @@ +package ru_fa + +import ( + "fmt" + "github.com/moov-io/watchman/pkg/search" +) + +func WrapEntities(tbls Tables) []search.Entity[search.Value] { + var entities []search.Entity[search.Value] + + // Wrap Non-commercial Organizations (assumed as organizations) + for i, org := range tbls.NonCommercialOrgs { + entity := search.Entity[search.Value]{ + Name: org.NameTranslit, + Type: search.EntityOrganization, + Source: search.SourceRUFA, + SourceID: fmt.Sprintf("ru_nc_%d", i+1), + SourceData: org, // org is automatically upcast to search.Value + } + entities = append(entities, entity) + } + + // Wrap Mass Media (assumed as organizations) + for i, m := range tbls.MassMedias { + entity := search.Entity[search.Value]{ + Name: m.NameTranslit, + Type: search.EntityOrganization, + Source: search.SourceRUFA, + SourceID: fmt.Sprintf("ru_mm_%d", i+1), + SourceData: m, + } + entities = append(entities, entity) + } + + // Wrap Media Individuals (assumed as persons) + for i, mi := range tbls.MediaIndividuals { + entity := search.Entity[search.Value]{ + Name: mi.NameTranslit, + Type: search.EntityPerson, + Source: search.SourceRUFA, + SourceID: fmt.Sprintf("ru_mi_%d", i+1), + SourceData: mi, + } + entities = append(entities, entity) + } + + // Wrap Foreign Agent Individuals (assumed as persons) + for i, fai := range tbls.ForeignAgentIndividuals { + entity := search.Entity[search.Value]{ + Name: fai.NameTranslit, + Type: search.EntityPerson, + Source: search.SourceRUFA, + SourceID: fmt.Sprintf("ru_fai_%d", i+1), + SourceData: fai, + } + entities = append(entities, entity) + } + + // Wrap Unregistered Associations (assumed as organizations) + for i, ua := range tbls.UnregisteredAssociations { + entity := search.Entity[search.Value]{ + Name: ua.NameTranslit, + Type: search.EntityOrganization, + Source: search.SourceRUFA, + SourceID: fmt.Sprintf("ru_ua_%d", i+1), + SourceData: ua, + } + entities = append(entities, entity) + } + + return entities +} diff --git a/pkg/ru_fa/reader.go b/pkg/ru_fa/reader.go new file mode 100644 index 00000000..778eaa4f --- /dev/null +++ b/pkg/ru_fa/reader.go @@ -0,0 +1,292 @@ +package ru_fa + +import ( + "fmt" + "io" + "path/filepath" + "strings" + "time" + + "github.com/moov-io/watchman/pkg/download" + + "github.com/PuerkitoBio/goquery" + "github.com/mozillazg/go-unidecode" +) + +// ---------- Constants and Helper Functions ---------- + +const ( + urlToDownload = "https://ru.ruwiki.ru/wiki/%D0%A1%D0%BF%D0%B8%D1%81%D0%BE%D0%BA_%D0%B8%D0%BD%D0%BE%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%BD%D1%8B%D1%85_%D0%B0%D0%B3%D0%B5%D0%BD%D1%82%D0%BE%D0%B2_(%D0%A0%D0%BE%D1%81%D1%81%D0%B8%D1%8F)" + htmlFilename = "page.html" + dateLayout = "02.01.2006" // Adjust if necessary. +) + +// normalizeName replaces « and » with a standard double quote. +func normalizeName(s string) string { + s = strings.ReplaceAll(s, "«", "\"") + s = strings.ReplaceAll(s, "»", "\"") + return s +} + +// transliterate converts a Russian string into Latin using go-unidecode. +func transliterate(s string) string { + return unidecode.Unidecode(s) +} + +// matchHeaders returns true if the two slices match element-by-element (after trimming). +func matchHeaders(actual, expected []string) bool { + if len(actual) != len(expected) { + return false + } + for i := range actual { + if strings.TrimSpace(actual[i]) != expected[i] { + return false + } + } + return true +} + +// isInclusionDateFuture returns true if dateStr (if non-empty) parsed in loc is after now. +func isInclusionDateFuture(dateStr string, now time.Time, loc *time.Location) bool { + dateStr = strings.TrimSpace(dateStr) + if dateStr == "" { + return false + } + t, err := time.ParseInLocation(dateLayout, dateStr, loc) + if err != nil { + return false + } + return t.After(now) +} + +// isExclusionDatePast returns true if dateStr (if non-empty) parsed in loc is before now. +func isExclusionDatePast(dateStr string, now time.Time, loc *time.Location) bool { + dateStr = strings.TrimSpace(dateStr) + if dateStr == "" { + return false + } + t, err := time.ParseInLocation(dateLayout, dateStr, loc) + if err != nil { + return false + } + return t.Before(now) +} + +// ---------- Expected Table Headers ---------- + +var headerNonCommercial = []string{"№", "Наименование", "Адрес", "ИНН", "Реестр.№", "Дата включения в реестр", "Дата исключения из реестра"} +var headerMassMedia = []string{"№", "Название", "Дата включения в реестр[1]", "Дата исключения из реестра"} +var headerMediaIndividual = []string{"№", "Имя", "Род деятельности", "Дата включения в реестр[1]", "Дата исключения из реестра"} +var headerForeignAgentIndividual = []string{"№", "", "", "Сведения об иностранных источниках", "Информация об осуществлении политической деятельности и (или) целенаправленном сборе сведений", "Дата включения в реестр[2]"} +var headerUnregisteredAssociation = []string{"№", "Наименование", "Дата включения в реестр", "Цели деятельности объединения", "Сведения об источниках формирования денежных средств и (или) иного имущества объединения, в том числе сведения об иностранных источниках поступления (планируемого поступления) денежных средств и иного имущества"} + +// ---------- Struct Definitions ---------- + +type NonCommercialOrganization struct { + Number string + Name string + NameTranslit string + Address string + INN string + RegistryNo string + InclusionDate string + ExclusionDate string +} + +type MassMedia struct { + Number string + Name string + NameTranslit string + InclusionDate string + ExclusionDate string +} + +type MediaIndividual struct { + Number string + Name string + NameTranslit string + Activity string + InclusionDate string + ExclusionDate string +} + +// In this table, the second column is Name and the third is Activity. +type ForeignAgentIndividual struct { + Number string + Name string + NameTranslit string + Activity string + ForeignSources string + PoliticalActivityInfo string + InclusionDate string +} + +type UnregisteredAssociation struct { + Number string + Name string + NameTranslit string + InclusionDate string + AssociationGoals string + SourceInfo string +} + +// Tables holds all table data. +type Tables struct { + NonCommercialOrgs []NonCommercialOrganization + MassMedias []MassMedia + MediaIndividuals []MediaIndividual + ForeignAgentIndividuals []ForeignAgentIndividual + UnregisteredAssociations []UnregisteredAssociation +} + +func Read(files download.Files) (Tables, error) { + for filename, file := range files { + switch strings.ToLower(filepath.Base(filename)) { + case htmlFilename: + tables, err := htmlFile(file) + if err != nil { + return tables, fmt.Errorf("%s: %v", htmlFilename, err) + } + return tables, nil + default: + var tables Tables + file.Close() + return tables, fmt.Errorf("error: file %s does not have a handler for processing", filename) + } + } + + var tables Tables + return tables, fmt.Errorf("error: file %s not found", htmlFilename) +} + +func htmlFile(f io.ReadCloser) (Tables, error) { + defer f.Close() + var tables Tables + + // Load Moscow timezone. + moscowLoc, err := time.LoadLocation("Europe/Moscow") + if err != nil { + return tables, fmt.Errorf("loading timezone: %v", err) + } + now := time.Now().In(moscowLoc) + + doc, err := goquery.NewDocumentFromReader(f) + if err != nil { + return tables, fmt.Errorf("parsing HTML: %v", err) + } + + // Iterate over every table. + doc.Find("table").Each(func(i int, tableSel *goquery.Selection) { + var rows [][]string + tableSel.Find("tr").Each(func(j int, rowSel *goquery.Selection) { + var row []string + rowSel.Find("th, td").Each(func(k int, cellSel *goquery.Selection) { + row = append(row, strings.TrimSpace(cellSel.Text())) + }) + if len(row) > 0 { + rows = append(rows, row) + } + }) + if len(rows) < 2 { + return + } + header := rows[0] + switch { + case matchHeaders(header, headerNonCommercial): + for _, row := range rows[1:] { + if len(row) < 7 { + continue + } + if isInclusionDateFuture(row[5], now, moscowLoc) || isExclusionDatePast(row[6], now, moscowLoc) { + continue + } + entry := NonCommercialOrganization{ + Number: row[0], + Name: row[1], + NameTranslit: transliterate(normalizeName(row[1])), + Address: row[2], + INN: row[3], + RegistryNo: row[4], + InclusionDate: row[5], + ExclusionDate: row[6], + } + tables.NonCommercialOrgs = append(tables.NonCommercialOrgs, entry) + } + case matchHeaders(header, headerMassMedia): + for _, row := range rows[1:] { + if len(row) < 4 { + continue + } + if isInclusionDateFuture(row[2], now, moscowLoc) || isExclusionDatePast(row[3], now, moscowLoc) { + continue + } + entry := MassMedia{ + Number: row[0], + Name: row[1], + NameTranslit: transliterate(normalizeName(row[1])), + InclusionDate: row[2], + ExclusionDate: row[3], + } + tables.MassMedias = append(tables.MassMedias, entry) + } + case matchHeaders(header, headerMediaIndividual): + for _, row := range rows[1:] { + if len(row) < 5 { + continue + } + if isInclusionDateFuture(row[3], now, moscowLoc) || isExclusionDatePast(row[4], now, moscowLoc) { + continue + } + entry := MediaIndividual{ + Number: row[0], + Name: row[1], + NameTranslit: transliterate(normalizeName(row[1])), + Activity: row[2], + InclusionDate: row[3], + ExclusionDate: row[4], + } + tables.MediaIndividuals = append(tables.MediaIndividuals, entry) + } + case matchHeaders(header, headerForeignAgentIndividual): + // In this table, the second column is Name and the third is Activity. + for _, row := range rows[1:] { + if len(row) < 6 { + continue + } + if isInclusionDateFuture(row[5], now, moscowLoc) { + continue + } + entry := ForeignAgentIndividual{ + Number: row[0], + Name: row[1], + NameTranslit: transliterate(normalizeName(row[1])), + Activity: row[2], + ForeignSources: row[3], + PoliticalActivityInfo: row[4], + InclusionDate: row[5], + } + tables.ForeignAgentIndividuals = append(tables.ForeignAgentIndividuals, entry) + } + case matchHeaders(header, headerUnregisteredAssociation): + for _, row := range rows[1:] { + if len(row) < 5 { + continue + } + if isInclusionDateFuture(row[2], now, moscowLoc) { + continue + } + entry := UnregisteredAssociation{ + Number: row[0], + Name: row[1], + NameTranslit: transliterate(normalizeName(row[1])), + InclusionDate: row[2], + AssociationGoals: row[3], + SourceInfo: row[4], + } + tables.UnregisteredAssociations = append(tables.UnregisteredAssociations, entry) + } + } + }) + + return tables, nil +} diff --git a/pkg/search/models.go b/pkg/search/models.go index ac983731..ca2f55c1 100644 --- a/pkg/search/models.go +++ b/pkg/search/models.go @@ -56,6 +56,7 @@ var ( SourceUKCSL SourceList = "uk_csl" SourceUSCSL SourceList = "us_csl" SourceUSOFAC SourceList = "us_ofac" + SourceRUFA SourceList = "ru_fa" ) type Person struct {