From 6d0e2d8907e76a701834b8c2f559a112acb2ac2a Mon Sep 17 00:00:00 2001 From: vCaesar Date: Fri, 9 Jun 2017 22:32:53 +0800 Subject: [PATCH] Remove 07.3.md spaces --- en/07.3.md | 312 ++++++++++++++++++++++++++--------------------------- 1 file changed, 156 insertions(+), 156 deletions(-) diff --git a/en/07.3.md b/en/07.3.md index 949c71f4..704347cf 100644 --- a/en/07.3.md +++ b/en/07.3.md @@ -11,33 +11,33 @@ If you recall form validation from previous sections, we used Regexp to verify t The `regexp` package has 3 functions to match: if it matches a pattern, then it returns true, returning false otherwise. ```Go - func Match(pattern string, b []byte) (matched bool, error error) - func MatchReader(pattern string, r io.RuneReader) (matched bool, error error) - func MatchString(pattern string, s string) (matched bool, error error) +func Match(pattern string, b []byte) (matched bool, error error) +func MatchReader(pattern string, r io.RuneReader) (matched bool, error error) +func MatchString(pattern string, s string) (matched bool, error error) ``` All 3 functions check if `pattern` matches the input source, returning true if it matches. However if your Regex has syntax errors, it will return an error. The 3 input sources of these functions are `slice of byte`, `RuneReader` and `string`. Here is an example of how to verify an IP address: ```Go - func IsIP(ip string) (b bool) { - if m, _ := regexp.MatchString("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$", ip); !m { - return false - } - return true +func IsIP(ip string) (b bool) { + if m, _ := regexp.MatchString("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$", ip); !m { + return false } + return true +} ``` As you can see, using pattern in the `regexp` package is not that different. Here's one more example on verifying whether user input is valid: ```Go - func main() { - if len(os.Args) == 1 { - fmt.Println("Usage: regexp [string]") - os.Exit(1) - } else if m, _ := regexp.MatchString("^[0-9]+$", os.Args[1]); m { - fmt.Println("Number") - } else { - fmt.Println("Not number") - } +func main() { + if len(os.Args) == 1 { + fmt.Println("Usage: regexp [string]") + os.Exit(1) + } else if m, _ := regexp.MatchString("^[0-9]+$", os.Args[1]); m { + fmt.Println("Number") + } else { + fmt.Println("Not number") } +} ``` In the above examples, we use `Match(Reader|String)` to check if content is valid, but they are all easy to use. @@ -47,189 +47,189 @@ Match mode can verify content but it cannot cut, filter or collect data from it. Let's say we need to write a crawler. Here is an example for when you must use Regexp to filter and cut data. ```Go - package main +package main - import ( - "fmt" - "io/ioutil" - "net/http" - "regexp" - "strings" - ) +import ( + "fmt" + "io/ioutil" + "net/http" + "regexp" + "strings" +) - func main() { - resp, err := http.Get("http://www.baidu.com") - if err != nil { - fmt.Println("http get error.") - } - defer resp.Body.Close() - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - fmt.Println("http read error") - return - } - - src := string(body) - - // Convert HTML tags to lower case. - re, _ := regexp.Compile("\\<[\\S\\s]+?\\>") - src = re.ReplaceAllStringFunc(src, strings.ToLower) - - // Remove STYLE. - re, _ = regexp.Compile("\\") - src = re.ReplaceAllString(src, "") - - // Remove SCRIPT. - re, _ = regexp.Compile("\\") - src = re.ReplaceAllString(src, "") - - // Remove all HTML code in angle brackets, and replace with newline. - re, _ = regexp.Compile("\\<[\\S\\s]+?\\>") - src = re.ReplaceAllString(src, "\n") - - // Remove continuous newline. - re, _ = regexp.Compile("\\s{2,}") - src = re.ReplaceAllString(src, "\n") - - fmt.Println(strings.TrimSpace(src)) +func main() { + resp, err := http.Get("http://www.baidu.com") + if err != nil { + fmt.Println("http get error.") } + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + fmt.Println("http read error") + return + } + + src := string(body) + + // Convert HTML tags to lower case. + re, _ := regexp.Compile("\\<[\\S\\s]+?\\>") + src = re.ReplaceAllStringFunc(src, strings.ToLower) + + // Remove STYLE. + re, _ = regexp.Compile("\\") + src = re.ReplaceAllString(src, "") + + // Remove SCRIPT. + re, _ = regexp.Compile("\\") + src = re.ReplaceAllString(src, "") + + // Remove all HTML code in angle brackets, and replace with newline. + re, _ = regexp.Compile("\\<[\\S\\s]+?\\>") + src = re.ReplaceAllString(src, "\n") + + // Remove continuous newline. + re, _ = regexp.Compile("\\s{2,}") + src = re.ReplaceAllString(src, "\n") + + fmt.Println(strings.TrimSpace(src)) +} ``` In this example, we use Compile as the first step for complex mode. It verifies that your Regex syntax is correct, then returns a `Regexp` for parsing content in other operations. Here are some functions to parse your Regexp syntax: ```Go - func Compile(expr string) (*Regexp, error) - func CompilePOSIX(expr string) (*Regexp, error) - func MustCompile(str string) *Regexp - func MustCompilePOSIX(str string) *Regexp +func Compile(expr string) (*Regexp, error) +func CompilePOSIX(expr string) (*Regexp, error) +func MustCompile(str string) *Regexp +func MustCompilePOSIX(str string) *Regexp ``` The difference between `ComplePOSIX` and `Compile` is that the former has to use POSIX syntax which is leftmost longest search, and the latter is only leftmost search. For instance, for Regexp `[a-z]{2,4}` and content `"aa09aaa88aaaa"`, `CompilePOSIX` returns `aaaa` but `Compile` returns `aa`. `Must` prefix means panic when the Regexp syntax is not correct, returning error otherwise. Now that we know how to create a new Regexp, let's see how the methods provided by this struct can help us to operate on content: ```Go - func (re *Regexp) Find(b []byte) []byte - func (re *Regexp) FindAll(b []byte, n int) [][]byte - func (re *Regexp) FindAllIndex(b []byte, n int) [][]int - func (re *Regexp) FindAllString(s string, n int) []string - func (re *Regexp) FindAllStringIndex(s string, n int) [][]int - func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string - func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int - func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte - func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int - func (re *Regexp) FindIndex(b []byte) (loc []int) - func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) - func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int - func (re *Regexp) FindString(s string) string - func (re *Regexp) FindStringIndex(s string) (loc []int) - func (re *Regexp) FindStringSubmatch(s string) []string - func (re *Regexp) FindStringSubmatchIndex(s string) []int - func (re *Regexp) FindSubmatch(b []byte) [][]byte - func (re *Regexp) FindSubmatchIndex(b []byte) []int +func (re *Regexp) Find(b []byte) []byte +func (re *Regexp) FindAll(b []byte, n int) [][]byte +func (re *Regexp) FindAllIndex(b []byte, n int) [][]int +func (re *Regexp) FindAllString(s string, n int) []string +func (re *Regexp) FindAllStringIndex(s string, n int) [][]int +func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string +func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int +func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte +func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int +func (re *Regexp) FindIndex(b []byte) (loc []int) +func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) +func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int +func (re *Regexp) FindString(s string) string +func (re *Regexp) FindStringIndex(s string) (loc []int) +func (re *Regexp) FindStringSubmatch(s string) []string +func (re *Regexp) FindStringSubmatchIndex(s string) []int +func (re *Regexp) FindSubmatch(b []byte) [][]byte +func (re *Regexp) FindSubmatchIndex(b []byte) []int ``` These 18 methods include identical functions for different input sources (byte slice, string and io.RuneReader), so we can really simplify this list by ignoring input sources as follows: ```Go - func (re *Regexp) Find(b []byte) []byte - func (re *Regexp) FindAll(b []byte, n int) [][]byte - func (re *Regexp) FindAllIndex(b []byte, n int) [][]int - func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte - func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int - func (re *Regexp) FindIndex(b []byte) (loc []int) - func (re *Regexp) FindSubmatch(b []byte) [][]byte - func (re *Regexp) FindSubmatchIndex(b []byte) []int +func (re *Regexp) Find(b []byte) []byte +func (re *Regexp) FindAll(b []byte, n int) [][]byte +func (re *Regexp) FindAllIndex(b []byte, n int) [][]int +func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte +func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int +func (re *Regexp) FindIndex(b []byte) (loc []int) +func (re *Regexp) FindSubmatch(b []byte) [][]byte +func (re *Regexp) FindSubmatchIndex(b []byte) []int ``` Code sample: ```Go - package main +package main - import ( - "fmt" - "regexp" - ) +import ( + "fmt" + "regexp" +) - func main() { - a := "I am learning Go language" +func main() { + a := "I am learning Go language" - re, _ := regexp.Compile("[a-z]{2,4}") + re, _ := regexp.Compile("[a-z]{2,4}") - // Find the first match. - one := re.Find([]byte(a)) - fmt.Println("Find:", string(one)) + // Find the first match. + one := re.Find([]byte(a)) + fmt.Println("Find:", string(one)) - // Find all matches and save to a slice, n less than 0 means return all matches, indicates length of slice if it's greater than 0. - all := re.FindAll([]byte(a), -1) - fmt.Println("FindAll", all) + // Find all matches and save to a slice, n less than 0 means return all matches, indicates length of slice if it's greater than 0. + all := re.FindAll([]byte(a), -1) + fmt.Println("FindAll", all) - // Find index of first match, start and end position. - index := re.FindIndex([]byte(a)) - fmt.Println("FindIndex", index) + // Find index of first match, start and end position. + index := re.FindIndex([]byte(a)) + fmt.Println("FindIndex", index) - // Find index of all matches, the n does same job as above. - allindex := re.FindAllIndex([]byte(a), -1) - fmt.Println("FindAllIndex", allindex) + // Find index of all matches, the n does same job as above. + allindex := re.FindAllIndex([]byte(a), -1) + fmt.Println("FindAllIndex", allindex) - re2, _ := regexp.Compile("am(.*)lang(.*)") + re2, _ := regexp.Compile("am(.*)lang(.*)") - // Find first submatch and return array, the first element contains all elements, the second element contains the result of first (), the third element contains the result of second (). - // Output: - // the first element: "am learning Go language" - // the second element: " learning Go ", notice spaces will be outputed as well. - // the third element: "uage" - submatch := re2.FindSubmatch([]byte(a)) - fmt.Println("FindSubmatch", submatch) - for _, v := range submatch { - fmt.Println(string(v)) - } - - // Same as FindIndex(). - submatchindex := re2.FindSubmatchIndex([]byte(a)) - fmt.Println(submatchindex) - - // FindAllSubmatch, find all submatches. - submatchall := re2.FindAllSubmatch([]byte(a), -1) - fmt.Println(submatchall) - - // FindAllSubmatchIndex,find index of all submatches. - submatchallindex := re2.FindAllSubmatchIndex([]byte(a), -1) - fmt.Println(submatchallindex) + // Find first submatch and return array, the first element contains all elements, the second element contains the result of first (), the third element contains the result of second (). + // Output: + // the first element: "am learning Go language" + // the second element: " learning Go ", notice spaces will be outputed as well. + // the third element: "uage" + submatch := re2.FindSubmatch([]byte(a)) + fmt.Println("FindSubmatch", submatch) + for _, v := range submatch { + fmt.Println(string(v)) } + + // Same as FindIndex(). + submatchindex := re2.FindSubmatchIndex([]byte(a)) + fmt.Println(submatchindex) + + // FindAllSubmatch, find all submatches. + submatchall := re2.FindAllSubmatch([]byte(a), -1) + fmt.Println(submatchall) + + // FindAllSubmatchIndex,find index of all submatches. + submatchallindex := re2.FindAllSubmatchIndex([]byte(a), -1) + fmt.Println(submatchallindex) +} ``` As we've previously mentioned, Regexp also has 3 methods for matching. They do the exact same thing as the exported functions. In fact, those exported functions actually call these methods under the hood: ```Go - func (re *Regexp) Match(b []byte) bool - func (re *Regexp) MatchReader(r io.RuneReader) bool - func (re *Regexp) MatchString(s string) bool +func (re *Regexp) Match(b []byte) bool +func (re *Regexp) MatchReader(r io.RuneReader) bool +func (re *Regexp) MatchString(s string) bool ``` Next, let's see how to replace strings using Regexp: ```Go - func (re *Regexp) ReplaceAll(src, repl []byte) []byte - func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte - func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte - func (re *Regexp) ReplaceAllLiteralString(src, repl string) string - func (re *Regexp) ReplaceAllString(src, repl string) string - func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string +func (re *Regexp) ReplaceAll(src, repl []byte) []byte +func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte +func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte +func (re *Regexp) ReplaceAllLiteralString(src, repl string) string +func (re *Regexp) ReplaceAllString(src, repl string) string +func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string ``` These are used in the crawling example, so we will not explain any further here. Let's take a look at the definition of `Expand`: ```Go - func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte - func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte +func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte +func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte ``` So how do we use `Expand`? ```Go - func main() { - src := []byte(` - call hello alice - hello bob - call hello eve - `) - pat := regexp.MustCompile(`(?m)(call)\s+(?P\w+)\s+(?P.+)\s*$`) - res := []byte{} - for _, s := range pat.FindAllSubmatchIndex(src, -1) { - res = pat.Expand(res, []byte("$cmd('$arg')\n"), src, s) - } - fmt.Println(string(res)) +func main() { + src := []byte(` + call hello alice + hello bob + call hello eve + `) + pat := regexp.MustCompile(`(?m)(call)\s+(?P\w+)\s+(?P.+)\s*$`) + res := []byte{} + for _, s := range pat.FindAllSubmatchIndex(src, -1) { + res = pat.Expand(res, []byte("$cmd('$arg')\n"), src, s) } + fmt.Println(string(res)) +} ``` At this point, you've learnt the whole `regexp` package in Go. I hope that you can understand more by studying examples of key methods, so that you can do something interesting on your own.