go-reloaded/transform.go at main · teeschima/go-reloaded · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
package main

import (
	"fmt"
	"regexp"
	"strconv"
	"strings"
	"unicode"
)

// This is the main function that does everything
// it takes the text and fixes it up
// I call all my helper functions here in the right order
// took me a while to figure out the order mattered lol
func Transform(text string) string {
	// first split the text into individual words
	words := strings.Fields(text)

	// now apply all the special commands like (up) (low) etc
	words = applyCommands(words)

	// put the words back together with spaces between them
	result := strings.Join(words, " ")

	// fix the punctuation spacing (commas, periods etc)
	result = fixPunctuation(result)

	// fix the quotes so there's no weird spaces inside them
	result = fixQuotes(result)

	// change "a" to "an" when the next word starts with a vowel
	// my english teacher would be proud
	result = fixAtoAn(result)

	return result
}

// this function goes through all the words and handles the special commands
// commands are things like (up) (low) (cap) (hex) (bin)
// up = uppercase, low = lowercase, cap = capitalize (first letter big rest small)
// hex and bin convert numbers from hex/binary to regular numbers
func applyCommands(words []string) []string {
	// out is where I store the finished words as I process them
	out := []string{}
	i := 0 // i is my index/counter for looping through words

	for i < len(words) {
		w := words[i] // current word we're looking at

		// check if this word is a command like (up) or (cap, 3)
		// parseCmd will tell us if it is and what it does
		if cmd, n, ok := parseCmd(w); ok {
			// default is to only affect 1 word before the command
			count := 1
			if n > 0 {
				// but if the command has a number, use that many words
				count = n
			}

			// go back through the last `count` words we already added
			// and apply the case transformation to each one
			for j := len(out) - count; j < len(out); j++ {
				if j >= 0 { // make sure we don't go below index 0 (that would crash)
					out[j] = applyCase(out[j], cmd)
				}
			}
			i++ // skip past the command word
			continue
		}

		// check if the NEXT word is (hex) or (bin)
		// these are different because they transform the CURRENT word not previous ones
		// e.g. "1a (hex)" means convert 1a from hexadecimal
		if i+1 < len(words) {
			next := words[i+1] // peek at the word after this one

			// is the next word the hex command?
			if next == "(hex)" {
				// try to parse w as a hexadecimal number
				if val, err := strconv.ParseInt(w, 16, 64); err == nil {
					// it worked! add the decimal version instead
					out = append(out, fmt.Sprintf("%d", val))
					i += 2 // skip both the hex number AND the (hex) command
					continue
				}
				// if it failed to parse, just fall through and treat it as a normal word
			}

			// same thing but for binary numbers
			if next == "(bin)" {
				// try to parse w as a binary number (base 2)
				if val, err := strconv.ParseInt(w, 2, 64); err == nil {
					// success! add the regular decimal number
					out = append(out, fmt.Sprintf("%d", val))
					i += 2 // skip the binary number and the (bin) command
					continue
				}
			}
		}

		// if we got here, it's just a normal word, add it as-is
		out = append(out, w)
		i++
	}
	return out
}

// parseCmd checks if a word is one of the case commands
// returns the command name, an optional number, and whether it matched at all
// the "ok" bool pattern is something I learned from Go tutorials, pretty handy
func parseCmd(s string) (cmd string, n int, ok bool) {
	// this regex matches things like:
	//   (up)
	//   (low)
	//   (cap)
	//   (up, 3)
	//   (cap, 10)
	// the (?:...) part is a non-capturing group (I looked this up)
	// the ? at the end makes the number part optional
	re := regexp.MustCompile(`^\((up|low|cap)(?:,\s*(\d+))?\)$`)
	m := re.FindStringSubmatch(s)

	// if m is nil, the regex didn't match, so it's not a command
	if m == nil {
		return "", 0, false
	}

	// m[1] is the first capture group = the command name (up/low/cap)
	cmd = m[1]

	// m[2] is the second capture group = the number (if there was one)
	if m[2] != "" {
		n, _ = strconv.Atoi(m[2]) // convert string to int, ignore error (it'll be fine)
	}

	return cmd, n, true // true means yes, this was a valid command!
}

// applyCase changes the capitalization of a single word
// cmd can be "up", "low", or "cap"
func applyCase(word, cmd string) string {
	switch cmd {
	case "up":
		// make the whole word uppercase
		// e.g. "hello" -> "HELLO"
		return strings.ToUpper(word)

	case "low":
		// make the whole word lowercase
		// e.g. "HELLO" -> "hello"
		return strings.ToLower(word)

	case "cap":
		// capitalize = first letter uppercase, rest lowercase
		// e.g. "hELLO" -> "Hello"
		if len(word) == 0 {
			return word // empty string, nothing to do
		}
		// convert to runes first because strings in Go are weird with unicode
		r := []rune(word)
		// uppercase the first rune, lowercase everything else, stick them together
		return string(unicode.ToUpper(r[0])) + strings.ToLower(string(r[1:]))
	}

	// if we somehow get an unknown command just return the word unchanged
	return word
}

// fixPunctuation makes sure punctuation is spaced correctly
// rule 1: no space BEFORE punctuation  -> "hello ." becomes "hello."
// rule 2: one space AFTER punctuation  -> "hello,world" becomes "hello, world"
func fixPunctuation(text string) string {
	// regex to find spaces right before punctuation marks and remove them
	// the + means "one or more spaces"
	re1 := regexp.MustCompile(` +([.,!?:;]+)`)
	text = re1.ReplaceAllString(text, "$1") // $1 means keep the punctuation, ditch the space

	// regex to find punctuation NOT followed by a space, and add one
	// the [^\s.,!?:;'"] means "any character that is NOT a space or punctuation or quote"
	// this prevents double-spacing if there's already a space
	re2 := regexp.MustCompile(`([.,!?:;]+)([^\s.,!?:;'"])`)
	text = re2.ReplaceAllString(text, "$1 $2") // put a space between them

	// trim any leftover spaces at the beginning or end of the whole string
	return strings.TrimSpace(text)
}

// fixQuotes removes extra whitespace inside single quotes
// so ' hello world ' becomes 'hello world'
// the regex is a bit tricky but it works!
func fixQuotes(text string) string {
	// ' = opening quote
	// \s+ = one or more whitespace after the opening quote
	// (.+?) = capture whatever's inside (the ? makes it non-greedy so it doesn't eat too much)
	// \s+ = whitespace before the closing quote
	// ' = closing quote
	re := regexp.MustCompile(`'\s+(.+?)\s+'`)

	// replace the whole match with just 'content' (no extra spaces)
	return re.ReplaceAllString(text, "'$1'")
}

// fixAtoAn replaces "a" with "an" when the next word starts with a vowel sound
// e.g. "a apple" -> "an apple"
// e.g. "a hero" -> "an hero"  (words starting with h also use "an")
// NOTE: this doesn't handle every edge case in English because English is weird
func fixAtoAn(text string) string {
	// split into words so we can look at pairs of words
	words := strings.Fields(text)

	// loop through words, stopping one before the end so we can always peek at the next word
	for i := 0; i < len(words)-1; i++ {
		w := words[i]
		next := words[i+1] // the word that comes right after

		// only care if the current word is "a" or "A"
		if (w == "a" || w == "A") && len(next) > 0 {
			// get the first character of the next word, lowercase it for comparison
			first := unicode.ToLower([]rune(next)[0])

			// check if it starts with a vowel or h
			// I included both upper and lowercase in the string just to be safe
			if strings.ContainsRune("aeiouAEIOUhH", first) {
				// preserve the original capitalization of "a"/"A"
				if w == "A" {
					words[i] = "An" // capital A becomes capital An
				} else {
					words[i] = "an" // lowercase a becomes lowercase an
				}
			}
		}
	}

	// join the words back together and we're done!
	return strings.Join(words, " ")
}