String Processing
String Basics Reviewβ
A Go string is an immutable sequence of bytes. Internally it is similar to []byte, but once created its contents cannot be modified.
package main
import "fmt"
func main() {
s := "Hello, World"
// A string can be converted to a byte slice
b := []byte(s)
b[0] = 'h' // []byte is mutable
fmt.Println(string(b)) // hello, World
// The original s is unchanged
fmt.Println(s) // Hello, World
// Indexing a string accesses individual bytes
fmt.Printf("s[0] = %d (%c)\n", s[0], s[0]) // 72 (H)
}
len(s) vs utf8.RuneCountInString(s)β
len(s) returns the number of bytes in the string. In UTF-8, non-ASCII characters (such as CJK characters and emoji) occupy multiple bytes, so the byte count differs from the actual character count.
package main
import (
"fmt"
"unicode/utf8"
)
func main() {
examples := []string{
"Hello", // ASCII only
"μλ
νμΈμ", // 5 Korean characters
"Hello δΈη", // ASCII + CJK
"Go π", // ASCII + emoji
}
fmt.Printf("%-15s %10s %10s\n", "String", "Bytes", "Chars")
fmt.Println("-----------------------------------")
for _, s := range examples {
byteLen := len(s)
runeLen := utf8.RuneCountInString(s)
fmt.Printf("%-15s %10d %10d\n", s, byteLen, runeLen)
}
}
Output:
String Bytes Chars
-----------------------------------
Hello 5 5
μλ
νμΈμ 15 5
Hello δΈη 12 9
Go π 7 4
Rune Iteration β Traversing Unicode Characters with rangeβ
When you iterate over a string with range, you get runes (Unicode code points), not bytes. The index is the byte position of that rune in the string; the value is the rune itself.
package main
import "fmt"
func main() {
s := "Go Lang"
// range iteration (recommended β rune-based)
fmt.Println("=== range iteration (rune-based) ===")
for i, r := range s {
fmt.Printf("index %2d: byte_pos=%2d, rune=%c, U+%04X\n",
i, i, r, r)
}
// Direct byte iteration (may break on non-ASCII)
fmt.Println("\n=== direct byte iteration ===")
for i := 0; i < len(s); i++ {
fmt.Printf("s[%d] = 0x%02X\n", i, s[i])
}
// Convert to []rune for index-based character access
runes := []rune(s)
fmt.Printf("\ntotal chars: %d\n", len(runes))
fmt.Printf("last char: %c\n", runes[len(runes)-1])
}
strings Package β Complete Function Referenceβ
Searching and Checkingβ
package main
import (
"fmt"
"strings"
)
func main() {
s := "The quick brown fox jumps over the lazy dog"
// Containment checks
fmt.Println(strings.Contains(s, "fox")) // true
fmt.Println(strings.ContainsAny(s, "aeiou")) // true (contains a vowel)
fmt.Println(strings.ContainsRune(s, 'Γ€')) // false
// Prefix / suffix
fmt.Println(strings.HasPrefix(s, "The")) // true
fmt.Println(strings.HasSuffix(s, "dog")) // true
fmt.Println(strings.HasSuffix(s, "cat")) // false
// Count occurrences
fmt.Println(strings.Count(s, "the")) // 1 (case-sensitive)
fmt.Println(strings.Count(s, "o")) // 4
// Find positions
fmt.Println(strings.Index(s, "fox")) // 16
fmt.Println(strings.LastIndex(s, "o")) // 41
fmt.Println(strings.Index(s, "cat")) // -1 (not found)
fmt.Println(strings.IndexByte(s, 'q')) // 4
}
Transformationsβ
package main
import (
"fmt"
"strings"
)
func main() {
s := " Hello, Go World! "
// Case conversion
fmt.Println(strings.ToUpper(s))
fmt.Println(strings.ToLower(s))
fmt.Println(strings.Title("hello world")) // "Hello World"
// Trimming whitespace
fmt.Printf("%q\n", strings.TrimSpace(s)) // "Hello, Go World!"
fmt.Printf("%q\n", strings.Trim("***hello***", "*")) // "hello"
fmt.Printf("%q\n", strings.TrimLeft("***hello***", "*")) // "hello***"
fmt.Printf("%q\n", strings.TrimRight("***hello***", "*")) // "***hello"
// Prefix / suffix removal
url := "https://example.com"
fmt.Println(strings.TrimPrefix(url, "https://")) // example.com
fmt.Println(strings.TrimSuffix(url, ".com")) // https://example
// Replacement
fmt.Println(strings.Replace("aababab", "a", "x", 2)) // "xxbabab" (first 2 only)
fmt.Println(strings.ReplaceAll("aababab", "a", "x")) // "xxbxbxb" (all)
// Repetition
fmt.Println(strings.Repeat("Go!", 3)) // "Go!Go!Go!"
}
Splitting and Joiningβ
package main
import (
"fmt"
"strings"
)
func main() {
// Split β split by separator (empty strings are included)
parts := strings.Split("a,b,c,d", ",")
fmt.Println(parts) // [a b c d]
fmt.Println(len(parts)) // 4
// SplitN β split into at most n substrings
parts2 := strings.SplitN("a:b:c:d", ":", 3)
fmt.Println(parts2) // [a b c:d]
// SplitAfter β separator is retained in each result element
parts3 := strings.SplitAfter("a,b,c", ",")
fmt.Println(parts3) // [a, b, c]
// Fields β split on whitespace (handles consecutive spaces)
words := strings.Fields(" foo bar baz ")
fmt.Println(words) // [foo bar baz]
// Join β combine a slice into a single string
joined := strings.Join([]string{"apple", "banana", "cherry"}, ", ")
fmt.Println(joined) // apple, banana, cherry
// Split lines
text := "line one\nline two\nline three"
lines := strings.Split(text, "\n")
for i, line := range lines {
fmt.Printf("line %d: %s\n", i+1, line)
}
}
strings.Builder β Efficient String Concatenationβ
Repeatedly concatenating strings with + allocates new memory on every iteration. strings.Builder reuses an internal buffer, dramatically improving performance.
package main
import (
"fmt"
"strings"
"time"
)
func buildWithPlus(n int) string {
result := ""
for i := 0; i < n; i++ {
result += fmt.Sprintf("item%d,", i)
}
return result
}
func buildWithBuilder(n int) string {
var b strings.Builder
for i := 0; i < n; i++ {
fmt.Fprintf(&b, "item%d,", i)
}
return b.String()
}
func main() {
n := 10_000
start := time.Now()
_ = buildWithPlus(n)
fmt.Printf("+ operator: %v\n", time.Since(start))
start = time.Now()
_ = buildWithBuilder(n)
fmt.Printf("strings.Builder: %v\n", time.Since(start))
// strings.Builder detailed usage
var sb strings.Builder
sb.WriteString("Hello") // append a string
sb.WriteRune(',') // append a rune
sb.WriteByte(' ') // append a byte
sb.WriteString("World!") // append a string
fmt.Println(sb.String()) // Hello, World!
fmt.Println("length:", sb.Len())
sb.Reset() // reset the buffer (memory is reused)
sb.WriteString("Reset!")
fmt.Println(sb.String())
}
fmt Package β Complete Format Verb Referenceβ
package main
import "fmt"
type Person struct {
Name string
Age int
}
func main() {
p := Person{"Alice", 30}
x := 255
// General formats
fmt.Printf("%v\n", p) // {Alice 30}
fmt.Printf("%+v\n", p) // {Name:Alice Age:30}
fmt.Printf("%#v\n", p) // main.Person{Name:"Alice", Age:30}
fmt.Printf("%T\n", p) // main.Person
// Integer formats
fmt.Printf("%d\n", x) // 255 (decimal)
fmt.Printf("%b\n", x) // 11111111 (binary)
fmt.Printf("%o\n", x) // 377 (octal)
fmt.Printf("%x\n", x) // ff (hex lowercase)
fmt.Printf("%X\n", x) // FF (hex uppercase)
fmt.Printf("%08b\n", x) // 11111111 (width 8, zero-padded)
fmt.Printf("%-8d|\n", x) // "255 |" (left-aligned)
fmt.Printf("%+d\n", x) // +255 (always show sign)
// Floating-point formats
f := 3.14159265
fmt.Printf("%f\n", f) // 3.141593 (default 6 decimal places)
fmt.Printf("%.2f\n", f) // 3.14
fmt.Printf("%e\n", f) // 3.141593e+00 (scientific notation)
fmt.Printf("%g\n", f) // 3.14159265 (compact representation)
fmt.Printf("%9.2f\n", f) // " 3.14" (width 9, 2 decimal places)
// String formats
s := "Hello"
fmt.Printf("%s\n", s) // Hello
fmt.Printf("%q\n", s) // "Hello" (with quotes)
fmt.Printf("%10s\n", s) // " Hello" (right-aligned)
fmt.Printf("%-10s|\n", s) // "Hello |" (left-aligned)
// Miscellaneous
fmt.Printf("%p\n", &x) // pointer address (e.g. 0xc000012088)
fmt.Printf("%c\n", 65) // A (Unicode code point)
fmt.Printf("%U\n", 'δΈ') // U+4E16
}
Sprintf, Fprintf, Errorfβ
package main
import (
"fmt"
"os"
"strings"
)
func main() {
// Sprintf β returns a formatted string
name := "Alice"
age := 30
msg := fmt.Sprintf("Name: %s, Age: %d", name, age)
fmt.Println(msg)
// Fprintf β writes to an io.Writer (file, network, buffer, etc.)
var sb strings.Builder
fmt.Fprintf(&sb, "Hello, %s!", name)
fmt.Println(sb.String())
// Write to standard error
fmt.Fprintf(os.Stderr, "Warning: %s\n", "unexpected value")
// Errorf β create an error
id := 42
err := fmt.Errorf("user ID %d not found", id)
fmt.Println(err)
// Wrap an error with %w (Go 1.13+)
originalErr := fmt.Errorf("database connection failed")
wrappedErr := fmt.Errorf("error during service initialization: %w", originalErr)
fmt.Println(wrappedErr)
}
strconv Package β Type Conversionsβ
strconv provides functions for converting basic types to strings and parsing strings into basic types.
package main
import (
"fmt"
"strconv"
)
func main() {
// Integer conversions
// Itoa: int β string (Integer to ASCII)
n := 42
s := strconv.Itoa(n)
fmt.Printf("Itoa(%d) = %q\n", n, s) // "42"
// Atoi: string β int (ASCII to Integer)
s2 := "123"
n2, err := strconv.Atoi(s2)
if err != nil {
fmt.Println("error:", err)
} else {
fmt.Printf("Atoi(%q) = %d\n", s2, n2) // 123
}
// Invalid input
_, err = strconv.Atoi("abc")
fmt.Println("Atoi error:", err) // strconv.Atoi: parsing "abc": invalid syntax
// ParseInt: specify base and bit size
hex, _ := strconv.ParseInt("ff", 16, 64) // parse hexadecimal
fmt.Println("0xff =", hex) // 255
bin, _ := strconv.ParseInt("1010", 2, 64) // parse binary
fmt.Println("0b1010 =", bin) // 10
// FormatInt: integer β string in specified base
fmt.Println(strconv.FormatInt(255, 2)) // "11111111"
fmt.Println(strconv.FormatInt(255, 16)) // "ff"
fmt.Println(strconv.FormatInt(255, 8)) // "377"
// Float conversions
f := 3.14159
fStr := strconv.FormatFloat(f, 'f', 2, 64) // 2 decimal places
fmt.Println(fStr) // "3.14"
f2, _ := strconv.ParseFloat("3.14159", 64)
fmt.Println(f2) // 3.14159
// Bool conversions
bStr := strconv.FormatBool(true)
fmt.Println(bStr) // "true"
b2, _ := strconv.ParseBool("true")
b3, _ := strconv.ParseBool("1")
_, err = strconv.ParseBool("yes") // error ("yes" is not supported)
fmt.Println(b2, b3, err)
}
Real-World Example 1: CSV Parsingβ
package main
import (
"fmt"
"strconv"
"strings"
)
type Student struct {
Name string
Age int
Score float64
Grade string
}
func parseCSV(line string) (Student, error) {
parts := strings.Split(line, ",")
if len(parts) != 4 {
return Student{}, fmt.Errorf("invalid CSV format: %q (field count: %d)", line, len(parts))
}
name := strings.TrimSpace(parts[0])
age, err := strconv.Atoi(strings.TrimSpace(parts[1]))
if err != nil {
return Student{}, fmt.Errorf("failed to parse age: %w", err)
}
score, err := strconv.ParseFloat(strings.TrimSpace(parts[2]), 64)
if err != nil {
return Student{}, fmt.Errorf("failed to parse score: %w", err)
}
grade := strings.TrimSpace(parts[3])
return Student{Name: name, Age: age, Score: score, Grade: grade}, nil
}
func main() {
csvData := `Alice, 20, 95.5, A
Bob, 22, 78.3, B
Charlie, 21, 88.0, B+
invalid line
Dave, 19, 92.7, A-`
lines := strings.Split(csvData, "\n")
var students []Student
for i, line := range lines {
student, err := parseCSV(line)
if err != nil {
fmt.Printf("line %d parse error: %v\n", i+1, err)
continue
}
students = append(students, student)
}
fmt.Println("\n=== Student List ===")
for _, s := range students {
fmt.Printf("%-10s age:%2d score:%5.1f grade:%s\n",
s.Name, s.Age, s.Score, s.Grade)
}
// Calculate average score
total := 0.0
for _, s := range students {
total += s.Score
}
fmt.Printf("\nAverage score: %.2f\n", total/float64(len(students)))
}
Real-World Example 2: Unicode String Processingβ
package main
import (
"fmt"
"unicode/utf8"
)
// Reverse a string character by character
func reverseString(s string) string {
runes := []rune(s)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i]
}
return string(runes)
}
// Return the nth character (rune) in a string
func charAt(s string, n int) (rune, error) {
runes := []rune(s)
if n < 0 || n >= len(runes) {
return 0, fmt.Errorf("index %d out of range (length: %d)", n, len(runes))
}
return runes[n], nil
}
// Truncate a string to max characters (not bytes)
func truncate(s string, maxChars int) string {
runes := []rune(s)
if len(runes) <= maxChars {
return s
}
return string(runes[:maxChars]) + "..."
}
func main() {
text := "Hello, Go Language!"
fmt.Printf("Original: %s\n", text)
fmt.Printf("Bytes: %d\n", len(text))
fmt.Printf("Characters: %d\n", utf8.RuneCountInString(text))
fmt.Printf("Reversed: %s\n", reverseString(text))
fmt.Printf("Truncated: %s\n", truncate(text, 8))
// Access 3rd character
r, err := charAt(text, 2)
if err != nil {
fmt.Println("error:", err)
} else {
fmt.Printf("3rd character: %c\n", r) // l
}
// Multi-language example
multiLang := []string{
"Hello", // English
"μλ
νμΈμ", // Korean
"γγγ«γ‘γ―", // Japanese
"δ½ ε₯½", // Chinese
"Ω
Ψ±ΨΨ¨Ψ§", // Arabic
}
fmt.Println("\n=== Multi-language character counts ===")
for _, s := range multiLang {
fmt.Printf("%-12s bytes: %2d, chars: %2d\n",
s, len(s), utf8.RuneCountInString(s))
}
}
Pro Tipsβ
Tip 1: Always use strings.Builder for repeated string concatenation
Using += inside a loop has O(nΒ²) time complexity. If you're concatenating more than ~10 strings, reach for strings.Builder.
// Bad: O(nΒ²)
result := ""
for _, item := range items {
result += item + ","
}
// Good: O(n)
var sb strings.Builder
for _, item := range items {
sb.WriteString(item)
sb.WriteByte(',')
}
result := sb.String()
Tip 2: Modify []byte, then convert once at the end
If you need to make repeated modifications to a string, convert to []byte first, make all changes, and then convert back to string once at the end.
b := []byte(original)
// multiple modifications to b
result := string(b) // single conversion at the end
Tip 3: strconv is faster than fmt.Sprintf for number-to-string conversions
For simple number-to-string conversions, strconv.Itoa and strconv.FormatFloat are significantly faster than fmt.Sprintf.
// Slower
s := fmt.Sprintf("%d", n)
// Faster
s := strconv.Itoa(n)
Tip 4: Use strings.EqualFold for case-insensitive string comparison
// Inefficient: two allocations
strings.ToLower(a) == strings.ToLower(b)
// Recommended: single pass, Unicode-aware
strings.EqualFold(a, b)
Tip 5: Use strings.ContainsAny to check for character membership
// Does the string contain any digit?
hasDigit := strings.ContainsAny(s, "0123456789")
// Does the string contain any special character?
hasSpecial := strings.ContainsAny(s, "!@#$%^&*()")