Url Parsing in C

Here’s the translation of the URL parsing example from Go to C:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// Simple structure to hold URL components
typedef struct {
    char *scheme;
    char *user;
    char *password;
    char *host;
    char *port;
    char *path;
    char *query;
    char *fragment;
} URL;

// Function to parse a URL string
URL* parse_url(const char* url_string) {
    URL* url = malloc(sizeof(URL));
    memset(url, 0, sizeof(URL));

    // Parse scheme
    char* scheme_end = strstr(url_string, "://");
    if (scheme_end) {
        url->scheme = strndup(url_string, scheme_end - url_string);
        url_string = scheme_end + 3;
    }

    // Parse user info
    char* at_sign = strchr(url_string, '@');
    if (at_sign) {
        char* user_info = strndup(url_string, at_sign - url_string);
        char* colon = strchr(user_info, ':');
        if (colon) {
            url->user = strndup(user_info, colon - user_info);
            url->password = strdup(colon + 1);
        } else {
            url->user = strdup(user_info);
        }
        free(user_info);
        url_string = at_sign + 1;
    }

    // Parse host and port
    char* path_start = strchr(url_string, '/');
    char* host_port = path_start ? strndup(url_string, path_start - url_string) : strdup(url_string);
    char* colon = strchr(host_port, ':');
    if (colon) {
        url->host = strndup(host_port, colon - host_port);
        url->port = strdup(colon + 1);
    } else {
        url->host = strdup(host_port);
    }
    free(host_port);

    // Parse path
    if (path_start) {
        url_string = path_start + 1;
        char* query_start = strchr(url_string, '?');
        if (query_start) {
            url->path = strndup(url_string, query_start - url_string);
            url_string = query_start + 1;
        } else {
            char* fragment_start = strchr(url_string, '#');
            if (fragment_start) {
                url->path = strndup(url_string, fragment_start - url_string);
                url_string = fragment_start + 1;
            } else {
                url->path = strdup(url_string);
                return url;
            }
        }
    }

    // Parse query
    char* fragment_start = strchr(url_string, '#');
    if (fragment_start) {
        url->query = strndup(url_string, fragment_start - url_string);
        url->fragment = strdup(fragment_start + 1);
    } else {
        url->query = strdup(url_string);
    }

    return url;
}

void free_url(URL* url) {
    if (url) {
        free(url->scheme);
        free(url->user);
        free(url->password);
        free(url->host);
        free(url->port);
        free(url->path);
        free(url->query);
        free(url->fragment);
        free(url);
    }
}

int main() {
    // We'll parse this example URL, which includes a
    // scheme, authentication info, host, port, path,
    // query params, and query fragment.
    const char* s = "postgres://user:pass@host.com:5432/path?k=v#f";

    // Parse the URL and ensure there are no errors.
    URL* u = parse_url(s);
    if (!u) {
        fprintf(stderr, "Failed to parse URL\n");
        return 1;
    }

    // Accessing the scheme is straightforward.
    printf("Scheme: %s\n", u->scheme);

    // User contains all authentication info
    printf("User: %s\n", u->user);
    printf("Password: %s\n", u->password);

    // The Host contains both the hostname and the port,
    // if present.
    printf("Host: %s\n", u->host);
    printf("Port: %s\n", u->port);

    // Here we extract the path and the fragment after
    // the #.
    printf("Path: %s\n", u->path);
    printf("Fragment: %s\n", u->fragment);

    // To get query params in a string of k=v format,
    // use the query field. You can parse this string
    // further if needed.
    printf("Query: %s\n", u->query);

    // Clean up
    free_url(u);

    return 0;
}

This C code provides a basic implementation of URL parsing. Here’s an explanation of the key points:

  1. We define a URL struct to hold the different components of a URL.

  2. The parse_url function takes a URL string and parses it into the URL struct. This function uses string manipulation functions from the C standard library to extract different parts of the URL.

  3. In the main function, we create an example URL string and parse it using our parse_url function.

  4. We then print out the different components of the parsed URL.

  5. Finally, we free the memory allocated for the URL struct.

Note that this implementation is basic and may not handle all edge cases or comply with the full URL specification. In a production environment, you might want to use a more robust, well-tested library for URL parsing.

To compile and run this program:

$ gcc url_parsing.c -o url_parsing
$ ./url_parsing
Scheme: postgres
User: user
Password: pass
Host: host.com
Port: 5432
Path: path
Fragment: f
Query: k=v

This program demonstrates basic URL parsing in C, showing how to extract different components of a URL string.