Understanding JWT

Understanding JWT#

This is a little script to help understand JSON Web Token (JWT). Also see:

# SPDX-FileCopyrightText: 2021-2023 Univention GmbH
#
# SPDX-License-Identifier: AGPL-3.0-only

# This little description lifted of https://auth0.com/blog/how-to-handle-jwt-in-python/

import base64
from pprint import pprint

# pip install pyjwt[crypto]
import jwt
from cryptography.hazmat.primitives import serialization

from jwt_secrets import PRIVATE, PUBLIC, shared
from recursive_jwt import recursive_jwt
from icecream import ic

# A jwt is centered around a payload, an object of arbitrary keys and values.
# Some of the keys have a predefined meaning, https://datatracker.ietf.org/doc/html/rfc7519#section-4.1, or
# in a shorter table here: https://www.iana.org/assignments/jwt/jwt.xhtml

payload1 = dict(sub='the subject',
                foo='bar')

# This payload gets encoded and signed using a key, which can be shared or an asymmetric secret key.

token1 = jwt.encode(payload=payload1,
                    key=shared)

# Lets have a look at the token...
ic(token1)  # eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ0aGUgc3ViamVjdCIsImZvbyI6ImJhciJ9.Dw6fFM
# -Jk7oP1g7Iu9FKl9cO5PEF7ITXEzRqWaD3sIo

# It consists of three separate base64 encoded parts, joined by a dot: header, payload, signature.

header, payload, signature = token1.split('.')

ic(base64.b64decode(header))  # header b'{"typ":"JWT","alg":"HS256"}'
ic(base64.b64decode(payload))  # payload b'{"sub":"the subject","foo":"bar"}'
ic(base64.b64decode(
    signature + '=='))  # signature b'\x0f\x0e\x9f\x14\xc2d\xee\x83\xf5\x83\xb2.\xf4R\xa5\xf5\xc3\xb9<A{
# !5\xc4\xcd\x1a\x96h=\xec"'
#                                              ^^ this is needed for b64 to do its work. Why is not known yet.


# We can get the unverified header from the token using the library. This means no key is required,
# but also that the header can't be trusted this way.

univerified_header = jwt.get_unverified_header(token1)
ic(univerified_header)  # {'typ': 'JWT', 'alg': 'HS256'}


# Decoding the token (a.k.a getting the payload) won't work with the library
# because what use is the payload if we haven't checked it's signature?
try:
    jwt.decode(token1, algorithms=[univerified_header['alg']])  # don't do this: verify that algo is a valid one
except jwt.exceptions.InvalidSignatureError:
    ic('no secret, no verifification, no decoding')

# So we set the key.
payload_decoded = jwt.decode(token1,
                             key=shared,
                             algorithms=["HS256"])

# so now we have a verified payload
ic(payload_decoded)  # {'sub': 'the subject', 'foo': 'bar'}

# I lied above. If we really need the unverified payload, we can, be setting
# the option and leaving out the key.
payload_unverified = jwt.decode(token1,
                                # no key
                                algorithms=[univerified_header['alg']], # We can also get the algo from here,
                                                                        # but unverified!
                                options={"verify_signature": False})

# In this case, nobody tampered with the payload.
assert payload_unverified == payload_decoded

# and now with public keys


# We have created a public key pair using ssh, and stored the contents of the
# keys in a separate file. We need to load the keys before using them. And the
# empty password has to be passed, we can't just leave it out.
public = serialization.load_ssh_public_key(PUBLIC.encode())
private = serialization.load_ssh_private_key(PRIVATE.encode(), password=b'')

# Encoding looks like above, just with a private key this time
token2 = jwt.encode(payload=payload1,
                    key=private,
                    algorithm='RS256')

# And decoding also looks the same
payload2_decoded = jwt.decode(token2,
                              key=public,
                              algorithms=['RS256'])
print()
print('=' * 40)
print()
print('verification')

# Now we want to verify the access token using the public key from keycloak.
# We get it from http://10.200.41.167:8080/auth/realms/bettermarks, but the
# key is not directly usable.
access_token = \
    "eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJMWVFSbjgxcXFCS196anVIcmZWcXU0QjZKejQweUlob1VtaGlmOWJPZDVzIn0" \
    ".eyJleHAiOjE2MjU3Mzk3ODgsImlhdCI6MTYyNTczOTQ4OCwiYXV0aF90aW1lIjoxNjI1NzM5NDg4LCJqdGkiOiJlNjY1NTY5OC05YzkyLTQ3M2UtYjk1Yy1kZmUyNWE2OGE5YWQiLCJpc3MiOiJodHRwOi8vMTAuMjAwLjQxLjE2Nzo4MDgwL2F1dGgvcmVhbG1zL2JldHRlcm1hcmtzIiwiYXVkIjoiYWNjb3VudCIsInN1YiI6ImY6YWFhMTIzNjMtNGE3YS00YTNkLThiZDEtYTUxNzAwYzY3MTc0OmZvbyIsInR5cCI6IkJlYXJlciIsImF6cCI6InB5dGhvbi1jbGllbnQtdGVzdCgxMC4yMDAuNDEuMTY0KSIsInNlc3Npb25fc3RhdGUiOiJhMGY4MmFiMy05NzMzLTQ1OGItOGM4ZS0zYzAxNTQzNmYyMjUiLCJhY3IiOiIxIiwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbImRlZmF1bHQtcm9sZXMtYmV0dGVybWFya3MiLCJvZmZsaW5lX2FjY2VzcyIsInVtYV9hdXRob3JpemF0aW9uIl19LCJyZXNvdXJjZV9hY2Nlc3MiOnsiYWNjb3VudCI6eyJyb2xlcyI6WyJtYW5hZ2UtYWNjb3VudCIsIm1hbmFnZS1hY2NvdW50LWxpbmtzIiwidmlldy1wcm9maWxlIl19fSwic2NvcGUiOiJvcGVuaWQgYmV0dGVybWFya3NJRCBwcm9maWxlIGVtYWlsIiwiZW1haWxfdmVyaWZpZWQiOmZhbHNlLCJuYW1lIjoiZm9vIGZvbyBmb28iLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJmb28iLCJnaXZlbl9uYW1lIjoiZm9vIGZvbyIsImZhbWlseV9uYW1lIjoiZm9vIiwiZW1haWwiOiJmb283ODlAZXhhbXBsZS5jb20ifQ.VzwIj4uivol1fv1iO4JdVIYBeuHLqSuA60EoQ-Ntd0apZ0vsLKHs-fOEAhLcLU6yNci990ac40C0ZW1-20DBV2vqzNxUXlC7e9eXY9Fx1b32kQZCNZvcEdNQj7j1dAu0hnxycj9fXlFkmXsD0W_DcdALvjRBcKTLBM_JQDUKpqUrwbUVgXxw0YFCUtY5jLCPEHHm8lUC3OFWVVjOHUJ08xTEwn5PXrhEfDaeXBbXHfKEkUNK-AibrZ1Mm3-hFUV6pNzq97aoaGCvH_-BIlJeRr5VGKP5XIay939vXpM_SLEzsWNgvESow0Rgp-ld4wT-q3ygCmF93QqxU2axedg2gQ"
keycloak_public_key = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAjxldobTiWegAsYhbVT+MdjNNCffFKkih1xt1nKaR" \
                      "+FGCOXV9zYpGnsBD+KCCG6atNTYgL8RDPsuYY/XdcmqpoYL4cXFJQVtyXUPEi38/AQ24uUgTifQURC1N1d5eGGPoJE4r9M" \
                      "/dGLmCsHTyVagXDQw1WG4Lbx/UNkDM72uk+rw9Phc+VZHvMXHcDpsIIc3fhf6g2Je4PLYu4" \
                      "/GPm86NCPyQlBNzYnmebxxzmAyLg9EI1YFhD2eP1paLag+difvF098a7ed+8sHnJUsLrn94E8RY28Oe5lpDh5g0wl" \
                      "+ph9AK5FIrWYw0HIYaHjq2S2rrUQeBynb2OwfokoKqLqmcrQIDAQAB"

# Instead we need to pre- and append header and footer. So the internet tells us.
keycloak_with_headers = "-----BEGIN PUBLIC KEY-----\n" + keycloak_public_key + "\n-----END PUBLIC KEY-----"
verified_token = jwt.decode(access_token,
                            key=keycloak_with_headers,
                            audience='account', # No idea yet what that means, again, the internet tells us...
                            algorithms=['RS256'],
                            options=dict(verify_exp=False)
                            )

# and now we have a verified token.
ic(verified_token)
print()

# And now for a little helper method to look at the complete token, might be useful. The helper
# function tries to recursively decode whatever it finds. But you can also use jwt.io or similar.
print('=' * 40)
print()
print('Access token')
# access_token
ic(recursive_jwt(access_token))