- Replace 6 compound Likert questions with 12 atomic ones grouped by dimension (syntax, expressiveness, data/IO, errors, overall); drop free-form question. Responses now stored as ints, not strings. - Back-compat layer maps legacy keys to new dimensions so existing results still render. - Parallelize run-all with ThreadPoolExecutor (configurable workers) and add a thread-safe min-request-interval rate limiter to the Anthropic provider. - Add new tasks: path_normalizer, todo_manager, currency_converter, locale_weather_url, network_info_parser, url_normalizer.
80 lines
2.0 KiB
TOML
80 lines
2.0 KiB
TOML
name = "url_normalizer"
|
|
category = "pipeline"
|
|
mode = "convert"
|
|
description = """
|
|
Read URLs from stdin, one per line. Normalize each URL:
|
|
1. If the URL already starts with "https://", keep it as-is.
|
|
2. If it starts with "http://", keep it as-is.
|
|
3. Otherwise, prepend "http://" to it.
|
|
4. After normalization, validate that the URL matches a basic pattern:
|
|
it must have a protocol (http:// or https://), followed by at least
|
|
one character, a dot, and at least one more character for the domain.
|
|
5. Output the normalized URL, or "INVALID: <original>" for invalid entries.
|
|
|
|
Skip empty lines silently.
|
|
"""
|
|
|
|
bash_source = '''
|
|
#!/bin/bash
|
|
while IFS= read -r line || [[ -n "$line" ]]; do
|
|
# Skip empty lines
|
|
[[ -z "$line" ]] && continue
|
|
|
|
# Trim whitespace
|
|
url=$(echo "$line" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
|
[[ -z "$url" ]] && continue
|
|
|
|
original="$url"
|
|
|
|
# Check if it already has https://
|
|
prefix8=$(echo "$url" | cut -c1-8)
|
|
if [[ "$prefix8" == "https://" ]]; then
|
|
normalized="$url"
|
|
else
|
|
prefix7=$(echo "$url" | cut -c1-7)
|
|
if [[ "$prefix7" == "http://" ]]; then
|
|
normalized="$url"
|
|
else
|
|
normalized="http://$url"
|
|
fi
|
|
fi
|
|
|
|
# Validate: protocol + something.something
|
|
if echo "$normalized" | grep -qE '^https?://[^/]+\.[^/]+'; then
|
|
echo "$normalized"
|
|
else
|
|
echo "INVALID: $original"
|
|
fi
|
|
done
|
|
'''
|
|
|
|
[[test_cases]]
|
|
description = "URLs with and without protocol"
|
|
stdin = """example.com
|
|
http://example.com
|
|
https://example.com
|
|
www.google.com/search?q=test"""
|
|
expected_stdout = """http://example.com
|
|
http://example.com
|
|
https://example.com
|
|
http://www.google.com/search?q=test"""
|
|
|
|
[[test_cases]]
|
|
description = "Invalid entries"
|
|
stdin = """notaurl
|
|
https://valid.example.com
|
|
just-a-word"""
|
|
expected_stdout = """INVALID: notaurl
|
|
https://valid.example.com
|
|
INVALID: just-a-word"""
|
|
|
|
[[test_cases]]
|
|
description = "Mixed valid and empty lines"
|
|
stdin = """https://secure.site.org/path
|
|
|
|
api.service.io:8080
|
|
http://old.site.net"""
|
|
expected_stdout = """https://secure.site.org/path
|
|
http://api.service.io:8080
|
|
http://old.site.net"""
|