Replace linkRegex with xurls library (#6261)

* Replace linkRegex with xurls library

Rather than maintaining a complicated regex to match URLs for
autolinking, gitea can use this existing go library that takes care of
the matching with very little code change to gitea itself. After
spending a while trying to find the perfect regex for all cases this library
still works better as it is more flexible than a single regex ever will be.

This will also fix the following issues: #5844 #3095 #3381

This passes all our current tests and I've added new ones mentioned in
those issues as well.

* Use xurls.StrictMatchingScheme instead of xurls.Strict

This is much faster and we only care about https? links to preserve
existing behavior.
This commit is contained in:
mrsdizzie
2019-03-07 15:12:01 -05:00
committed by techknowlogick
parent 01bd1fcd33
commit f2de5dc8c8
9 changed files with 2038 additions and 3 deletions

View File

@ -17,6 +17,7 @@ import (
"code.gitea.io/gitea/modules/util"
"github.com/Unknwon/com"
"github.com/mvdan/xurls"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
@ -64,9 +65,7 @@ var (
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
// matches http/https links. used for autlinking those. partly modified from
// the original present in autolink.js
linkRegex = regexp.MustCompile(`(?:(?:http|https):\/\/(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(?:\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-]*)?\??(?:[\-\+:=&;%@\.\w]*)#?(?:[\.\!\/\\\w]*))?`)
linkRegex, _ = xurls.StrictMatchingScheme("https?://")
)
// regexp for full links to issues/pulls

View File

@ -104,6 +104,15 @@ func TestRender_links(t *testing.T) {
test(
"http://142.42.1.1/",
`<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`)
test(
"https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd",
`<p><a href="https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd" rel="nofollow">https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd</a></p>`)
test(
"https://en.wikipedia.org/wiki/URL_(disambiguation)",
`<p><a href="https://en.wikipedia.org/wiki/URL_(disambiguation)" rel="nofollow">https://en.wikipedia.org/wiki/URL_(disambiguation)</a></p>`)
test(
"https://foo_bar.example.com/",
`<p><a href="https://foo_bar.example.com/" rel="nofollow">https://foo_bar.example.com/</a></p>`)
// Test that should *not* be turned into URL
test(