Skip to content

Commit

Permalink
Merge pull request #115 from evanofslack/color
Browse files Browse the repository at this point in the history
Add new table to handle post colors
  • Loading branch information
evanofslack authored Aug 7, 2023
2 parents 9af64ec + cf5b8fd commit 229be41
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 72 deletions.
1 change: 1 addition & 0 deletions backend/post.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ type Image struct {
type Color struct {
Hex string `json:"hex"`
Css string `json:"css"`
Html string `json:"html,omitempty"`
Percent float64 `json:"percent"`
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP TABLE IF EXISTS colors;
12 changes: 12 additions & 0 deletions backend/postgres/migrations/000005_create_colors_table.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
CREATE TABLE IF NOT EXISTS colors(
id SERIAL PRIMARY KEY,
hex VARCHAR(10) NOT NULL,
css VARCHAR(255) NOT NULL,
html VARCHAR(255) NOT NULL,
percent NUMERIC(9, 8),
post_id INT NOT NULL,
CONSTRAINT fk_post_id
FOREIGN KEY(post_id)
REFERENCES pictures(id)
ON DELETE CASCADE
);
148 changes: 119 additions & 29 deletions backend/postgres/post.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,77 @@ func (db *DB) deleteKeywords(ctx context.Context, tx *sql.Tx, postID int64) erro
return nil
}

// insertKeywords inserts a post's keywords into the DB
func (db *DB) insertColors(ctx context.Context, tx *sql.Tx, colors []analogdb.Color, postID int64) error {

db.logger.Debug().Ctx(ctx).Int64("postID", postID).Msg("Starting insert colors")

first := 1
second := 2
third := 3
fourth := 4
fifth := 5

vals := []any{}
inserts := []string{}

query :=
`
INSERT INTO colors
(hex, css, html, percent, post_id)
VALUES `

for _, c := range colors {
inserts = append(inserts, fmt.Sprintf("($%d, $%d, $%d, $%d, $%d)", first, second, third, fourth, fifth))
vals = append(vals, c.Hex, c.Css, c.Html, c.Percent, postID)
first += 5
second += 5
third += 5
fourth += 5
fifth += 5
}

query += strings.Join(inserts, ",")
stmt, err := tx.PrepareContext(ctx, query)

if err != nil {
db.logger.Error().Err(err).Ctx(ctx).Int64("postID", postID).Msg("Failed to insert colors")
return err
}

defer stmt.Close()

_, err = stmt.ExecContext(ctx, vals...)

if err != nil {
db.logger.Error().Err(err).Ctx(ctx).Int64("postID", postID).Msg("Failed to insert colors")
return err
}

db.logger.Info().Ctx(ctx).Int64("postID", postID).Msg("Finished inserting colors")

return nil
}

// deleteKeywords deletes all keywords for a given post
func (db *DB) deleteColors(ctx context.Context, tx *sql.Tx, postID int64) error {

db.logger.Debug().Ctx(ctx).Int64("postID", postID).Msg("Starting delete colors")

query :=
"DELETE FROM colors WHERE post_id = $1"

rows, err := tx.QueryContext(ctx, query, postID)
defer rows.Close()
if err != nil {
db.logger.Error().Err(err).Ctx(ctx).Int64("postID", postID).Msg("Failed to delete colors")
return err
}

db.logger.Info().Ctx(ctx).Int64("postID", postID).Msg("Finished deleting colors")
return nil
}

func (db *DB) createPost(ctx context.Context, tx *sql.Tx, post *analogdb.CreatePost) (*analogdb.Post, error) {

db.logger.Debug().Ctx(ctx).Msg("Starting create post")
Expand All @@ -313,7 +384,15 @@ func (db *DB) createPost(ctx context.Context, tx *sql.Tx, post *analogdb.CreateP
}
}

// commit transaction if both inserts are ok
// insert colors if they are provided
if len(post.Colors) != 0 {
err = db.insertColors(ctx, tx, post.Colors, *id)
if err != nil {
return nil, err
}
}

// commit transaction if all inserts are ok
err = tx.Commit()
if err != nil {
db.logger.Error().Err(err).Ctx(ctx).Int64("postID", *id).Msg("Failed to create post")
Expand Down Expand Up @@ -445,14 +524,15 @@ func (db *DB) patchPost(ctx context.Context, tx *sql.Tx, patch *analogdb.PatchPo

hasPatchFields := false

// if the patch includes updates for the post
if patch.Nsfw != nil || patch.Sprocket != nil || patch.Grayscale != nil || patch.Score != nil || patch.Colors != nil {
// if the patch includes general updates for the post
if patch.Nsfw != nil || patch.Sprocket != nil || patch.Grayscale != nil || patch.Score != nil {
hasPatchFields = true
if err := db.updatePost(ctx, tx, patch, id); err != nil {
if err := db.updatePostGeneral(ctx, tx, patch, id); err != nil {
db.logger.Error().Err(err).Ctx(ctx).Int("postID", id).Msg("Failed to patch post")
return err
}
}

// if the patch includes updates for keywords
if patch.Keywords != nil {
hasPatchFields = true
Expand All @@ -462,6 +542,15 @@ func (db *DB) patchPost(ctx context.Context, tx *sql.Tx, patch *analogdb.PatchPo
}
}

// if the patch includes updates for colors
if patch.Colors != nil {
hasPatchFields = true
if err := db.updateColors(ctx, tx, *patch.Colors, id); err != nil {
db.logger.Error().Err(err).Ctx(ctx).Int("postID", id).Msg("Failed to patch post")
return err
}
}

if !hasPatchFields {
err := errors.New("must include patch parameters")
db.logger.Error().Err(err).Ctx(ctx).Int("postID", id).Msg("Failed to patch post")
Expand Down Expand Up @@ -511,7 +600,32 @@ func (db *DB) updateKeywords(ctx context.Context, tx *sql.Tx, keywords []analogd
return nil
}

func (db *DB) updatePost(ctx context.Context, tx *sql.Tx, patch *analogdb.PatchPost, id int) error {
func (db *DB) updateColors(ctx context.Context, tx *sql.Tx, colors []analogdb.Color, id int) error {

db.logger.Debug().Ctx(ctx).Int("postID", id).Msg("Starting update colors")

// first delete all colors associated with post
if err := db.deleteColors(ctx, tx, int64(id)); err != nil {
return err
}

// if we have no colors to insert, just return
if len(colors) == 0 {
db.logger.Info().Ctx(ctx).Int("postID", id).Msg("Finished updating colors (dropped all colors)")
return nil
}

// then insert all new colors
if err := db.insertColors(ctx, tx, colors, int64(id)); err != nil {
db.logger.Error().Err(err).Ctx(ctx).Int("postID", id).Msg("Failed to update colors")
return err
}

db.logger.Info().Ctx(ctx).Int("postID", id).Msg("Finished updating colors")
return nil
}

func (db *DB) updatePostGeneral(ctx context.Context, tx *sql.Tx, patch *analogdb.PatchPost, id int) error {

db.logger.Debug().Ctx(ctx).Int("postID", id).Msg("Starting update post")

Expand Down Expand Up @@ -848,30 +962,6 @@ func patchToSet(patch *analogdb.PatchPost) (string, []any, error) {
args = append(args, *sprocket)
index += 1
}
if colors := patch.Colors; colors != nil {
if len(*colors) != 5 {
return "", args, fmt.Errorf("Invalid color array provided, expected %d colors, got %d", 5, len(*colors))
}

// for each color in colors, we need to append hex, css and percent fields
for i, color := range *colors {

// add the hex
set = append(set, fmt.Sprintf("c%d_hex = $%d", i+1, index))
args = append(args, color.Hex)
index += 1

// add the css
set = append(set, fmt.Sprintf("c%d_css = $%d", i+1, index))
args = append(args, color.Css)
index += 1

// add the percent
set = append(set, fmt.Sprintf("c%d_percent = $%d", i+1, index))
args = append(args, color.Percent)
index += 1
}
}

// no update fields provided
if len(set) == 0 {
Expand Down
38 changes: 3 additions & 35 deletions scraper/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,7 @@ def get_keyword_updated_post_ids(username: str, password: str) -> List[int]:
auth=HTTPBasicAuth(username=username, password=password),
)
if r.status_code != 200:
raise Exception(
f"failed to fetch {path} with response: {r.json()}"
)
raise Exception(f"failed to fetch {path} with response: {r.json()}")
try:
data = r.json()
except Exception as e:
Expand Down Expand Up @@ -202,7 +200,7 @@ def json_to_post(data: dict) -> AnalogDisplayPost:

def post_to_json(post: AnalogPost):
images = post_to_json_images(post)
colors = post_to_json_colors(post)
colors = colors_to_json(post.colors)
keywords = keywords_to_json(post.keywords)
body = {
"title": post.title,
Expand Down Expand Up @@ -249,36 +247,6 @@ def post_to_json_images(post: AnalogPost) -> List[dict]:
return [low, med, high, raw]


def post_to_json_colors(post: AnalogPost) -> List[dict]:
# expected 5 colors
c1 = {
"hex": post.c1_hex,
"css": post.c1_css,
"percent": post.c1_percent,
}
c2 = {
"hex": post.c2_hex,
"css": post.c2_css,
"percent": post.c2_percent,
}
c3 = {
"hex": post.c3_hex,
"css": post.c3_css,
"percent": post.c3_percent,
}
c4 = {
"hex": post.c4_hex,
"css": post.c4_css,
"percent": post.c4_percent,
}
c5 = {
"hex": post.c5_hex,
"css": post.c5_css,
"percent": post.c5_percent,
}
return [c1, c2, c3, c4, c5]


def keywords_to_json(keywords: List[AnalogKeyword]) -> List[dict]:

json_keywords: List[dict] = []
Expand All @@ -292,7 +260,7 @@ def colors_to_json(colors: List[Color]) -> List[dict]:
# expected 5 colors from highest to lowest percent
json_colors = []
for c in colors:
temp = {"hex": c.hex, "css": c.css, "percent": c.percent}
temp = {"hex": c.hex, "css": c.css, "html": c.html, "percent": c.percent}
json_colors.append(temp)
return json_colors

Expand Down
30 changes: 25 additions & 5 deletions scraper/image_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from PIL import ImageChops
from PIL.Image import ANTIALIAS, Image, new, open
from scipy.spatial import KDTree
from webcolors import CSS3_HEX_TO_NAMES, hex_to_rgb, rgb_to_hex
from webcolors import (CSS3_HEX_TO_NAMES, HTML4_HEX_TO_NAMES, hex_to_rgb,
rgb_to_hex)

from constants import COLOR_LIMIT, COLOR_TOLERANCE, LOW_RES
from models import Color
Expand Down Expand Up @@ -60,7 +61,7 @@ def remove_border(image: Image) -> Image:
return image


def rgb_to_name(rgb: Tuple[int, int, int]) -> str:
def rgb_to_css(rgb: Tuple[int, int, int]) -> str:
# use KDTree to find closest CSS name for RGB color

names = []
Expand All @@ -76,6 +77,22 @@ def rgb_to_name(rgb: Tuple[int, int, int]) -> str:
return match


def rgb_to_html(rgb: Tuple[int, int, int]) -> str:
# use KDTree to find closest HTML name for RGB color

names = []
rgb_values = []

for hex, name in HTML4_HEX_TO_NAMES.items():
names.append(name)
rgb_values.append(hex_to_rgb(hex))

kdt_db = KDTree(rgb_values)
_, index = kdt_db.query(rgb)
match = names[index]
return match


def extract_colors(image: Image, count: int = COLOR_LIMIT) -> List[Color]:
"""
Expand All @@ -99,19 +116,22 @@ def extract_colors(image: Image, count: int = COLOR_LIMIT) -> List[Color]:
hex = rgb_to_hex(rgb)

# get closest matching css color
css = rgb_to_name(rgb)
css = rgb_to_css(rgb)

# get closest matching html color
html = rgb_to_html(rgb)

# get percent of image with this color
percent = round(pixels / total_pixels, 8)

# append it
extracted.append(Color(hex=hex, css=css, percent=percent))
extracted.append(Color(hex=hex, css=css, html=html, percent=percent))

# we need to send 5 colors to analogdb
# if we dont have 5 colors, append fillers
num_filler = COLOR_LIMIT - len(extracted)
if num_filler > 0:
filler = Color(hex="null", css="null", percent=0.0)
filler = Color(hex="null", css="null", html="null", percent=0.0)
for _ in range(num_filler):
extracted.append(filler)

Expand Down
12 changes: 9 additions & 3 deletions scraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from loguru import logger

from api import get_latest_links, upload_to_analogdb
from batch import update_posts_keywords, update_posts_scores
from batch import (update_posts_colors, update_posts_keywords,
update_posts_scores)
from comment import get_comments, post_keywords
from configuration import dependencies_from_config, init_config
from constants import (ANALOG_POSTS, ANALOG_SUB, BW_POSTS, BW_SUB,
Expand Down Expand Up @@ -86,15 +87,20 @@ def update_keywords(deps: Dependencies):
update_posts_keywords(deps=deps, count=100, limit=KEYWORD_LIMIT)


@logger.catch(message="caught error while updating post colors")
def update_colors(deps: Dependencies):
update_posts_colors(deps=deps, count=100)


def run_schedule(deps: Dependencies):

# scrape posts
schedule.every().day.do(scrape_bw, deps=deps)
schedule.every().day.do(scrape_sprocket, deps=deps)
schedule.every(4).hours.do(scrape_analog, deps=deps)

schedule.every().day.do(update_scores, deps=deps)
schedule.every().day.do(update_keywords, deps=deps)
# schedule.every().day.do(update_scores, deps=deps)
# schedule.every().day.do(update_keywords, deps=deps)

schedule.run_all()

Expand Down
Loading

0 comments on commit 229be41

Please sign in to comment.