Weird Fiction

Code for SVG // svg.html



<script> // lovecraft data from adjFILTERED.tsv 

const lovecraftData = [ 

    { word: "wild", synset: 17 },

    { word: "deep", synset: 21 },

    { word: "dark", synset: 16 },

    { word: "low", synset: 16 },

    { word: "good", synset: 27 },

    { word: "open", synset: 36 },

    { word: "blue", synset: 16 },

    { word: "big", synset: 17 },

    { word: "soft", synset: 20 },

    { word: "bad", synset: 17 },

    { word: "last", synset: 21 },

    { word: "dead", synset: 21 },

    { word: "black", synset: 22 },

    { word: "white", synset: 25 },

    { word: "hot", synset: 21 },

    { word: "fit", synset: 16 },

    { word: "cold", synset: 16 },

    { word: "right", synset: 36 },

    { word: "short", synset: 23 },

    { word: "heavy", synset: 30 },

    { word: "well", synset: 22 },

    { word: "good", synset: 27 },

    { word: "high", synset: 18 },

    { word: "dry", synset: 19 },

    { word: "live", synset: 19 },

    { word: "solid", synset: 18 },

    { word: "clear", synset: 45 },

    { word: "close", synset: 37 },

    { word: "second", synset: 15 },

    { word: "present", synset: 18 },

    { word: "heavy", synset: 30 },

    { word: "advanced", synset: 20 },

    { word: "back", synset: 28 },

    { word: "true", synset: 15 },

    { word: "dull", synset: 19 },

    { word: "sharp", synset: 15 },

    { word: "standing", synset: 21 },

    { word: "last", synset: 21 },

    { word: "foul", synset: 16 },

    { word: "regular", synset: 17 },

    { word: "closing", synset: 23 },

    { word: "darker", synset: 45 },

    { word: "wild", synset: 17 },

    { word: "light", synset: 47 },

    { word: "short", synset: 23 },

    { word: "rough", synset: 18 },

    { word: "first", synset: 16 },

    { word: "loose", synset: 18 },

    { word: "separate", synset: 19 },

    { word: "upset", synset: 17 },

    { word: "closed", synset: 26 },

    { word: "gray", synset: 15 },

    { word: "dull", synset: 19 },

    { word: "blue", synset: 16 },

    { word: "flat", synset: 24 },

    { word: "fair", synset: 17 },

    { word: "frozen", synset: 17 },

    { word: "top", synset: 22 },

    { word: "hard", synset: 22 },

    { word: "soft", synset: 20 },

    { word: "left", synset: 24 },

    { word: "right", synset: 36 },

    { word: "sound", synset: 24 },

    { word: "free", synset: 22 },

    { word: "crossing", synset: 15 },



];

// PoeData from adjFILTERED.tsv

const poeData = [

    { word: "dark", synset: 16 },

    { word: "open", synset: 36 },

    { word: "well", synset: 22 },

    { word: "true", synset: 15 },

    { word: "good", synset: 27 },

    { word: "high", synset: 18 },

    { word: "last", synset: 21 },

    { word: "close", synset: 37 },

    { word: "clear", synset: 45 },

    { word: "first", synset: 16 },

    { word: "present", synset: 18 },

    { word: "square", synset: 25 },
    { word: "puff", synset: 17 },

    { word: "deep", synset: 21 },

    { word: "right", synset: 36 },

    { word: "closed", synset: 26 },

    { word: "second", synset: 15 },

    { word: "short", synset: 23 },

    { word: "true", synset: 15 },

    { word: "blue", synset: 16 },

    { word: "broken", synset: 22 },

    { word: "directed", synset: 15 },

    { word: "wild", synset: 17 },

    { word: "easy", synset: 15 },

    { word: "fair", synset: 17 },

    { word: "black", synset: 22 },

    { word: "high", synset: 18 },

    { word: "sound", synset: 24 },

    { word: "regular", synset: 17 },

    { word: "solid", synset: 18 },

    { word: "flat", synset: 24 },

    { word: "white", synset: 25 },

    { word: "fitting", synset: 15 },

    { word: "tight", synset: 16 },

    { word: "square", synset: 25 },

    { word: "heavy", synset: 30 },

    { word: "solid", synset: 18 },

    { word: "free", synset: 22 },

    { word: "sweet", synset: 16 },

    { word: "big", synset: 17 },

    { word: "soft", synset: 20 },

    { word: "hard", synset: 22 },

    { word: "separate", synset: 19 },

    { word: "direct", synset: 24 },

    { word: "double", synset: 21 },

    { word: "low", synset: 16 },

    { word: "dead", synset: 21 },

    { word: "light", synset: 47 },

    { word: "flush", synset: 18 },

    { word: "sharp", synset: 15 },

    { word: "round", synset: 25 },

    { word: "loose", synset: 18 },

    { word: "wild", synset: 17 },

    { word: "rough", synset: 18 },

    { word: "extended", synset: 22 },

    { word: "easy", synset: 15 },

    { word: "cold", synset: 16 },

    { word: "marked", synset: 18 },

    { word: "subject", synset: 15 },

    { word: "good", synset: 27 },

    { word: "clear", synset: 45 },

    { word: "dull", synset: 19 },

    { word: "gray", synset: 15 },

    { word: "last", synset: 21 },

    { word: "still", synset: 18 },

    { word: "striking", synset: 25 },

    { word: "straight", synset: 21 },

    { word: "frozen", synset: 17 },

    { word: "striped", synset: 15 },

    { word: "free", synset: 22 },

    { word: "direct", synset: 24 },

    { word: "last", synset: 21 },

    { word: "fast", synset: 15 },

    { word: "covered", synset: 27 },

    { word: "fair", synset: 17 },

    { word: "round", synset: 25 },

    { word: "hot", synset: 21 },

    { word: "fit", synset: 16 },

    { word: "wilder", synset: 15 },

    { word: "bad", synset: 17 },


];


const sharedWords = Array.from(new Set(lovecraftData.map(d => d.word)))

    .filter(word => poeData.some(p => p.word === word));


const svg = document.getElementById("bubbleChart");

const toggleBtn = document.getElementById("toggleBtn");

let showSharedOnly = false;


toggleBtn.addEventListener("click", () => {

    showSharedOnly = !showSharedOnly;

    toggleBtn.textContent = showSharedOnly ? "Show All Words" : "Show Shared Words Only";

    renderBubbles(lovecraftData, poeData);

});



function draw(data, offsetX, color1, color2) {

    data.forEach((item, i) => {

        const group = document.createElementNS("http://www.w3.org/2000/svg", "g");

        group.setAttribute("class", "bubble");


        const radius = item.synset;

        const col = i % 4;    //4 col

        const row = Math.floor(i / 4); //4 row

        const x = offsetX + col * 150;

        const y = 100 + row * 130;


        const circle = document.createElementNS("http://www.w3.org/2000/svg", "circle");

        circle.setAttribute("cx", x);

        circle.setAttribute("cy", y);

        circle.setAttribute("r", radius);

        circle.setAttribute("fill", color1);

        circle.setAttribute("stroke", color2);

        circle.setAttribute("stroke-width", "2");


        const title = document.createElementNS("http://www.w3.org/2000/svg", "title");

        title.textContent = `${item.word} (Synset: ${item.synset})`;

        circle.appendChild(title); //moved inside circle instead of group


        const text = document.createElementNS("http://www.w3.org/2000/svg", "text");

        text.setAttribute("x", x);

        text.setAttribute("y", y + 4);

        text.setAttribute("text-anchor", "middle");

        text.textContent = item.word;


        group.appendChild(circle);

        group.appendChild(text);

        svg.appendChild(group);



        maxWidth = Math.max(maxWidth, x + radius * 2);

        maxHeight = Math.max(maxHeight, y + radius * 2);

    });

}



function renderBubbles(lovecraft, poe) {

    svg.innerHTML = "";



    maxWidth = 0;

    maxHeight = 0;


    if (showSharedOnly) {

        const shared = lovecraft.filter(d => sharedWords.includes(d.word));

        draw(shared, 200, "#facc15", "#eab308"); // Yellow shared

    } else {

        draw(lovecraft, 150, "#69e864", "#339f0e"); // Green Lovecraft

        draw(poe, 750, "#c084fc", "#9333ea");       // Purple Poe

    }


    svg.setAttribute("viewBox", `0 0 ${maxWidth + 100} ${maxHeight + 100}`);

    svg.setAttribute("preserveAspectRatio", "xMidYMin meet");

}


renderBubbles(lovecraftData, poeData);


</script>

Python to create the Cytoscape // words.html


        import os

        import spacy

        from nltk.corpus import wordnet as wn

        import pandas as pd



        nlp = spacy.load("en_core_web_md")


        collPath = 'corpora'


        def wordCollector(words, unit):

        wordList = []

        nodeAtts = []

        unitList = []

        synsetCounts = []

        for token in words:

        if token.pos_ == "ADJ" and len(wn.synsets(token.lemma_)) >= 15:

        synsets = len(wn.synsets(token.lemma_))

        wordList.append(token.lemma_)

        nodeAtts.append(token.pos_)

        unitList.append(unit)

        synsetCounts.append(synsets)


        data = {

        'word': wordList,

        'nodeType': nodeAtts,

        'unit': unitList,

        'synset' : synsetCounts}

        df = pd.DataFrame(data)

        return df


        allDataFrames = []


        for file in os.listdir(collPath):

        if file.endswith("Sentences.txt"):

        filepath = f"{collPath}/{file}"

        name, extension = os.path.splitext(file)

        with open(filepath, 'r', encoding='utf8') as f:

        readFile = f.read()

        spacyRead = nlp(readFile)

        myDataFrame = wordCollector(spacyRead, name)

        allDataFrames.append(myDataFrame)



        # Make an output filepath

        outputFilePath = 'adjFILTERED.tsv'

        # Turn the list of dataframes into one dataframe:

        fullDataFrame = pd.concat(allDataFrames, ignore_index=True)


        fullDataFrame.to_csv(outputFilePath, sep='\t', index=False)

        print('I just saved a dataframe as a TSV file.')

        # Go check your filestash for the file.

Regex Key // regKey.md


            ## Regex Steps

            # Lovecraft


            1. Added `<xml>` over entire document.

            2. Found `(^\n*(.+\n(?:.+\n)*)` and replaced it with `<para>$1</para>` to divide work into paragraphs 

            3. Found Gutenberg headings, titles, and beginning quotes from other authors and wrapped them in a

            `<meta>` tag. Changed `<para>` tags to `<info>` tags within these for clarity.

                4. Found `(?<=<para>)([^<]+?)([.!?])` and wrapped in `<s>` tag for first sentence. Oxygen

                    didn't play nice with the para tags at the beginning, so this was necessary.

                    5. Marked the rest of the `<s>` tags by using `(?<=</s>)([^<]+?)([.!?])`. Had to

                    continually find and replace in order to get everything in a paragraph, as this simply

                    targeted the next sentence.

                    6. Found `"(.*?)"` to find items in quotes and wrap them in a `quote` tag.


                    ** Note: most paragraphs that consist *entirely* of quotes do not have `<s>` tags.

                        I figured it could be valuable to have paragraphs with only quotations as a

                        datapoint, so I didn't remedy this.**


                        # Poe

                        1. Most everything is the same as with the Lovecraft edits, however,

                        Poe's use of single quotation marks for dialogue (i.e., "Hello blah blah blah,

                        continue onto next line...) instead of wrapping them in quotations made the quotes

                        a little harder to track. Some provisions were taken by only looking for one quote,

                        but some may have fell through the cracks.

The human-readable key used for extracting sentences from the text. It doesn't include quotations as not to muddy the data. It also served to remove some special characters the Project Gutenberg library versions of the text had.