From d0305f37283f13495b772499847038dbd00a4509 Mon Sep 17 00:00:00 2001 From: Mutasim <154865594+MutasimBinNazmul@users.noreply.github.com> Date: Sat, 4 May 2024 08:47:34 +0600 Subject: [PATCH 1/5] Update index.ts --- tensor-api/index.ts | 110 ++++++++++++++++++++++++-------------------- 1 file changed, 60 insertions(+), 50 deletions(-) diff --git a/tensor-api/index.ts b/tensor-api/index.ts index 3d48cac..db4fe31 100644 --- a/tensor-api/index.ts +++ b/tensor-api/index.ts @@ -1,33 +1,33 @@ -import * as tf from "@tensorflow/tfjs-node" -import fs from "fs" -import csv from "csv-parser" -import { Transform } from "stream" -import OpenAI from "openai" +import * as tf from "@tensorflow/tfjs-node"; +import fs from "fs"; +import csv from "csv-parser"; +import { Transform } from "stream"; +import OpenAI from "openai"; const openai = new OpenAI({ apiKey: process.env.OPENAI_KEY, -}) +}); interface Row { - embedding: string - rating: string + embedding: string; + rating: string; } function createLineRangeStream(startLine: number, endLine: number) { - let currentLine = 0 + let currentLine = 0; return new Transform({ transform(chunk, _, callback) { if (currentLine >= startLine && currentLine < endLine) { - this.push(chunk) + this.push(chunk); } - currentLine++ + currentLine++; if (currentLine >= endLine) { - this.push(null) + this.push(null); } - callback() + callback(); }, objectMode: true, - }) + }); } async function parseCSV( @@ -36,26 +36,32 @@ async function parseCSV( endLine: number ): Promise { return new Promise((resolve, reject) => { - const rows: Row[] = [] + const rows: Row[] = []; fs.createReadStream(filePath) .pipe(csv({ separator: "|" })) .pipe(createLineRangeStream(startLine, endLine)) .on("data", (row) => { - rows.push(row) + rows.push(row); }) .on("error", (error) => { - reject(error) + reject(error); }) .on("end", () => { - resolve(rows) - }) - }) + resolve(rows); + }); + }); } class AI { + model: tf.Sequential; + + constructor() { + this.model = this.compile(); + } + compile() { - const model = tf.sequential() + const model = tf.sequential(); // input layer model.add( @@ -63,7 +69,7 @@ class AI { units: 3, inputShape: [1536], }) - ) + ); // output layer model.add( @@ -71,59 +77,63 @@ class AI { units: 1, activation: "sigmoid", }) - ) + ); model.compile({ loss: "binaryCrossentropy", optimizer: "sgd", metrics: ["accuracy"], - }) + }); - return model + return model; } - async run() { - const model = this.compile() - - const data = await parseCSV("prepared_dataset.csv", 0, 45000) + async train() { + const data = await parseCSV("prepared_dataset.csv", 0, 45000); const converted = data.map((row) => ({ embedding: JSON.parse(row.embedding), rating: Number(row.rating), - })) - - const xsConverted = converted.map(({ embedding }) => embedding) + })); - const ysConverted = converted.map(({ rating }) => [rating]) + const xsConverted = converted.map(({ embedding }) => embedding); - console.log(xsConverted, ysConverted) + const ysConverted = converted.map(({ rating }) => [rating]); - const xs = tf.tensor2d(xsConverted) + const xs = tf.tensor2d(xsConverted); - const ys = tf.tensor2d(ysConverted) + const ys = tf.tensor2d(ysConverted); - await model.fit(xs, ys, { + await this.model.fit(xs, ys, { epochs: 250, - }) - - const testText = "hello world" // no flagging expected + }); + } + async predict(text: string) { const stuff = await openai.embeddings.create({ - input: testText, + input: text, model: "text-embedding-3-small", - }) + }); - const vector = stuff.data[0].embedding + const vector = stuff.data[0].embedding; - const example = tf.tensor2d([vector]) - const prediction = model.predict(example) + const example = tf.tensor2d([vector]); + const prediction = this.model.predict(example); - // @ts-ignore - prediction.print() + return prediction.dataSync()[0]; + } - await model.save("file://./profanity-model") + async save() { + await this.model.save("file://./profanity-model"); } } -const ai = new AI() -ai.run() +async function main() { + const ai = new AI(); + await ai.train(); + const prediction = await ai.predict("hello world"); + console.log("Prediction:", prediction); + await ai.save(); +} + +main(); From e77ba53944d5789568b706ff2de838b922d07031 Mon Sep 17 00:00:00 2001 From: Mutasim <154865594+MutasimBinNazmul@users.noreply.github.com> Date: Sat, 4 May 2024 08:48:24 +0600 Subject: [PATCH 2/5] Update predict.ts --- tensor-api/predict.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensor-api/predict.ts b/tensor-api/predict.ts index 7b34228..ca2f043 100644 --- a/tensor-api/predict.ts +++ b/tensor-api/predict.ts @@ -18,6 +18,10 @@ app.post("/", async (c) => { const body = await c.req.json() const { message } = body + if (!message) { + return c.json({ error: "Missing message in request body" }, 400) + } + const openaiRes = await openai.embeddings.create({ input: message, model: "text-embedding-3-small", @@ -43,7 +47,8 @@ app.post("/", async (c) => { note: "1 is very toxic/profane, 0 is not profane at all", }) } catch (err) { - return c.json({ error: JSON.stringify(err) }) + console.error(err) + return c.json({ error: "An error occurred during prediction" }, 500) } }) From 93a5c71e49f4e2165c317d64f8d3fc5a65a8d65d Mon Sep 17 00:00:00 2001 From: Mutasim <154865594+MutasimBinNazmul@users.noreply.github.com> Date: Sat, 4 May 2024 08:49:12 +0600 Subject: [PATCH 3/5] Update prepare.ts --- tensor-api/prepare.ts | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/tensor-api/prepare.ts b/tensor-api/prepare.ts index 2659ca9..dc7ad36 100644 --- a/tensor-api/prepare.ts +++ b/tensor-api/prepare.ts @@ -5,7 +5,7 @@ import { OpenAI } from "openai" const openai = new OpenAI({ apiKey: process.env.OPENAI_KEY, -}) +}); const writeStream = fs.createWriteStream("prepared_dataset.csv", { flags: "a" }) @@ -69,26 +69,32 @@ const prepare = async () => { const data = await parseCSV("raw_dataset.csv", start, end) - data.forEach(async (row) => { - const hate = Number(row.severe_toxic) - const obscene = Number(row.obscene) - const insult = Number(row.insult) - const identity = Number(row.identity_hate) - const threat = Number(row.threat) + for (const row of data) { + try { + const hate = Number(row.severe_toxic) + const obscene = Number(row.obscene) + const insult = Number(row.insult) + const identity = Number(row.identity_hate) + const threat = Number(row.threat) - const isFlagged = hate || obscene || insult || identity || threat + const isFlagged = hate || obscene || insult || identity || threat - const stuff = await openai.embeddings.create({ - input: row.comment_text, - model: "text-embedding-3-small", - }) + const stuff = await openai.embeddings.create({ + input: row.comment_text, + model: "text-embedding-3-small", + }) - const vector = stuff.data[0].embedding + const vector = stuff.data[0].embedding - writeStream.write(`[${vector}]|${isFlagged ? 1 : 0}` + "\n") + writeStream.write(`[${vector}]|${isFlagged ? 1 : 0}` + "\n") - await new Promise((resolve) => setTimeout(resolve, 500)) - }) + await new Promise((resolve) => setTimeout(resolve, 500)) + } catch (error) { + console.error( + `Error processing row ${row.id}: ${error.message}` + ) + } + } } } From 9f4981adbd27c8068936b3d2c7a5789f7a7fdfe9 Mon Sep 17 00:00:00 2001 From: Mutasim <154865594+MutasimBinNazmul@users.noreply.github.com> Date: Sat, 4 May 2024 08:50:12 +0600 Subject: [PATCH 4/5] Update model.json --- tensor-api/profanity-model/model.json | 151 +++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 1 deletion(-) diff --git a/tensor-api/profanity-model/model.json b/tensor-api/profanity-model/model.json index 2e044e5..0d1c160 100644 --- a/tensor-api/profanity-model/model.json +++ b/tensor-api/profanity-model/model.json @@ -1 +1,150 @@ -{"modelTopology":{"class_name":"Sequential","config":{"name":"sequential_1","layers":[{"class_name":"Dense","config":{"units":3,"activation":"linear","use_bias":true,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"normal","seed":null}},"bias_initializer":{"class_name":"Zeros","config":{}},"kernel_regularizer":null,"bias_regularizer":null,"activity_regularizer":null,"kernel_constraint":null,"bias_constraint":null,"name":"dense_Dense1","trainable":true,"batch_input_shape":[null,1536],"dtype":"float32"}},{"class_name":"Dense","config":{"units":1,"activation":"sigmoid","use_bias":true,"kernel_initializer":{"class_name":"VarianceScaling","config":{"scale":1,"mode":"fan_avg","distribution":"normal","seed":null}},"bias_initializer":{"class_name":"Zeros","config":{}},"kernel_regularizer":null,"bias_regularizer":null,"activity_regularizer":null,"kernel_constraint":null,"bias_constraint":null,"name":"dense_Dense2","trainable":true}}]},"keras_version":"tfjs-layers 4.18.0","backend":"tensor_flow.js"},"weightsManifest":[{"paths":["weights.bin"],"weights":[{"name":"dense_Dense1/kernel","shape":[1536,3],"dtype":"float32"},{"name":"dense_Dense1/bias","shape":[3],"dtype":"float32"},{"name":"dense_Dense2/kernel","shape":[3,1],"dtype":"float32"},{"name":"dense_Dense2/bias","shape":[1],"dtype":"float32"}]}],"format":"layers-model","generatedBy":"TensorFlow.js tfjs-layers v4.18.0","convertedBy":null} \ No newline at end of file +{ + "modelTopology": { + "class_name": "Sequential", + "config": { + "name": "sequential_1", + "layers": [ + { + "class_name": "Dense", + "config": { + "units": 128, + "activation": "relu", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1, + "mode": "fan_avg", + "distribution": "normal", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": { + "class_name": "L2", + "config": { + "l2": 0.01 + } + }, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null, + "name": "dense_Dense1", + "trainable": true, + "batch_input_shape": [null, 1536], + "dtype": "float32" + } + }, + { + "class_name": "Dense", + "config": { + "units": 64, + "activation": "relu", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1, + "mode": "fan_avg", + "distribution": "normal", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": { + "class_name": "L2", + "config": { + "l2": 0.01 + } + }, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null, + "name": "dense_Dense2", + "trainable": true, + "batch_input_shape": [null, 128], + "dtype": "float32" + } + }, + { + "class_name": "Dense", + "config": { + "units": 1, + "activation": "sigmoid", + "use_bias": true, + "kernel_initializer": { + "class_name": "VarianceScaling", + "config": { + "scale": 1, + "mode": "fan_avg", + "distribution": "normal", + "seed": null + } + }, + "bias_initializer": { + "class_name": "Zeros", + "config": {} + }, + "kernel_regularizer": null, + "bias_regularizer": null, + "activity_regularizer": null, + "kernel_constraint": null, + "bias_constraint": null, + "name": "dense_Dense3", + "trainable": true, + "batch_input_shape": [null, 64], + "dtype": "float32" + } + } + ] + } + }, + "weightsManifest": [ + { + "paths": ["weights.bin"], + "weights": [ + { + "name": "dense_Dense1/kernel", + "shape": [1536, 128], + "dtype": "float32" + }, + { + "name": "dense_Dense1/bias", + "shape": [128], + "dtype": "float32" + }, + { + "name": "dense_Dense2/kernel", + "shape": [128, 64], + "dtype": "float32" + }, + { + "name": "dense_Dense2/bias", + "shape": [64], + "dtype": "float32" + }, + { + "name": "dense_Dense3/kernel", + "shape": [64, 1], + "dtype": "float32" + }, + { + "name": "dense_Dense3/bias", + "shape": [1], + "dtype": "float32" + } + ] + } + ], + "format": "layers-model", + "generatedBy": "TensorFlow.js tfjs-layers v4.18.0", + "convertedBy": null +} From 1e8db1b4cd40f5a3471d3d13bf643272daa15dc2 Mon Sep 17 00:00:00 2001 From: Mutasim <154865594+MutasimBinNazmul@users.noreply.github.com> Date: Sat, 4 May 2024 08:51:51 +0600 Subject: [PATCH 5/5] Update index.ts